|
[ |
|
{ |
|
"sentence": "Typical generative model approaches include naive Bayes classifier s , Gaussian mixture model s , variational autoencoders and others .", |
|
"entity_list": [ |
|
{ |
|
"name": "naive Bayes classifier", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Gaussian mixture model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "variational autoencoders", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Finally , every other year , ELRA organizes a major conference LREC , the International Language Resources and Evaluation Conference .", |
|
"entity_list": [ |
|
{ |
|
"name": "ELRA", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "LREC", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "International Language Resources and Evaluation Conference", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The task is usually to derive the maximum likelihood estimate of the parameters of the HMM given the of output sequences .", |
|
"entity_list": [ |
|
{ |
|
"name": "maximum likelihood estimate", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "HMM", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Unlike neural network s and Support vector machine , the AdaBoost training process selects only those features known to improve the predictive power of the model , reducing dimensionality and potentially improving execution time as irrelevant features need not be computed .", |
|
"entity_list": [ |
|
{ |
|
"name": "neural network", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Support vector machine", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "AdaBoost", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Troponymy is one of the possible relations between verb s in the semantic network of the WordNet database .", |
|
"entity_list": [ |
|
{ |
|
"name": "Troponymy", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "semantic network", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "WordNet database", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A frame language is a technology used for knowledge representation in artificial intelligence .", |
|
"entity_list": [ |
|
{ |
|
"name": "knowledge representation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "NIST also differs from Bilingual evaluation understudy in its calculation of the brevity penalty insofar as small variations in translation length do not impact the overall score as much .", |
|
"entity_list": [ |
|
{ |
|
"name": "NIST", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Bilingual evaluation understudy", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "brevity penalty", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The model is initially fit on a training dataset , The model ( e.g. a neural net or a naive Bayes classifier ) is trained on the training dataset using a supervised learning method , for example using optimization methods such as gradient descent or stochastic gradient descent .", |
|
"entity_list": [ |
|
{ |
|
"name": "neural net", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "naive Bayes classifier", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "supervised learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "gradient descent", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "stochastic gradient descent", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "FrameNet has been used in applications like question answering , paraphrasing , recognizing textual entailment , and information extraction , either directly or by means of Semantic Role Labeling tools .", |
|
"entity_list": [ |
|
{ |
|
"name": "FrameNet", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "question answering", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "paraphrasing", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "recognizing textual entailment", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "information extraction", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Semantic Role Labeling", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "This would include programs such as data analysis and extraction tools , spreadsheets ( e.g. Excel ) , databases ( e.g. Access ) , statistical analysis ( e.g. SAS ) , generalized audit software ( e.g. ACL , Arbutus , EAS ) , business intelligence ( e.g. Crystal Reports and Business Objects ) , etc .", |
|
"entity_list": [ |
|
{ |
|
"name": "data analysis", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "spreadsheets", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Excel", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "databases", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Access", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "statistical analysis", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "SAS", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "generalized audit software", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "ACL", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Arbutus", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "EAS", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "business intelligence", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Crystal Reports", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Business Objects", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Rethink Robotics - founded by Rodney Brooks , previously with iRobot - introduced Baxter in September 2012 ; as an industrial robot designed to safely interact with neighboring human workers , and be programmable for performing simple tasks .", |
|
"entity_list": [ |
|
{ |
|
"name": "Rethink Robotics", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Rodney Brooks", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "iRobot", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Baxter", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "industrial robot", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Typical text mining tasks include text categorization , text clustering , concept / entity extraction , production of granular taxonomies , sentiment analysis , document summarization , and entity relation modeling ( i.e. , learning relations between named entity recognition ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "text mining", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "text categorization", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "text clustering", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "concept / entity extraction", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "production of granular taxonomies", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "sentiment analysis", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "document summarization", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "entity relation modeling", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "named entity recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Nonetheless , stemming reduces precision , or TRUE negative rate , for such systems .", |
|
"entity_list": [ |
|
{ |
|
"name": "precision", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TRUE negative rate", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A special case of keyword spotting is wake word ( also called hot word ) detection used by personal digital assistants such as Alexa or Siri to wake up when their name is spoken .", |
|
"entity_list": [ |
|
{ |
|
"name": "keyword spotting", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "wake word", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "hot word", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Alexa", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Siri", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Prova is an open source programming language that combines Prolog with Java .", |
|
"entity_list": [ |
|
{ |
|
"name": "Prova", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Prolog", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 1987 , Tocibai Machine , a subsidiary of Toshiba , was accused of illegally selling CNC milling s used to produce very quiet submarine propellers to the Soviet Union in violation of the CoCom agreement , an international embargo on certain countries to COMECON countries .", |
|
"entity_list": [ |
|
{ |
|
"name": "Tocibai Machine", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Toshiba", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "CNC milling", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Soviet Union", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "CoCom", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "COMECON", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Engelberger 's most famous co-invention , the Unimate industrial robotic arm , was among the first inductees into the Robot Hall of Fame in 2003 .", |
|
"entity_list": [ |
|
{ |
|
"name": "Engelberger", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Unimate industrial robotic arm", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Robot Hall of Fame", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Originally controlled via static html web pages using CGI , work by Dalton saw the introduction of an augmented reality Java -based interface that met with limited success .", |
|
"entity_list": [ |
|
{ |
|
"name": "static html", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "CGI", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Dalton", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "augmented reality", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The first publication about the LMF specification as it has been ratified by ISO ( this paper became ( in 2015 ) the 9th most cited paper within the LREC conferences from LREC papers ) :", |
|
"entity_list": [ |
|
{ |
|
"name": "LMF specification", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "ISO", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "LREC", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A confusion matrix or matching matrix is often used as a tool to validate the accuracy of k -NN classification .", |
|
"entity_list": [ |
|
{ |
|
"name": "confusion matrix", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "accuracy", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "k -NN classification", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Decision tree learning is one of the predictive modeling approaches used in statistics , data mining and machine learning .", |
|
"entity_list": [ |
|
{ |
|
"name": "Decision tree", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "statistics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "data mining", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "At runtime , the target prosody of a sentence is superimposed on these minimal units by means of signal processing techniques such as linear predictive coding , PSOLA", |
|
"entity_list": [ |
|
{ |
|
"name": "prosody", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "signal processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "linear predictive coding", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "This approach utilized artificial intelligence and machine learning to allow researchers to visibly compare conventional and thermal facial imagery .", |
|
"entity_list": [ |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "facial imagery", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In computer science , evolutionary computation is a family of algorithms for global optimization inspired by biological evolution , and the subfield of artificial intelligence and soft computing studying these algorithms .", |
|
"entity_list": [ |
|
{ |
|
"name": "computer science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "evolutionary computation", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "global optimization", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "biological evolution", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "soft computing", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "For instance , one can combine some measure based on the confusion matrix with the mean squared error evaluated between the raw model outputs and the actual values .", |
|
"entity_list": [ |
|
{ |
|
"name": "confusion matrix", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "mean squared error", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The majority are results of the word2vec model developed by Mikolov et al or variants of word2vec .", |
|
"entity_list": [ |
|
{ |
|
"name": "word2vec model", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Mikolov", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "word2vec", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "It was during this time that a total of 43 publications were recognized by the CVPR and the International Conference on Computer Vision ( ICCV ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "CVPR", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "International Conference on Computer Vision", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "ICCV", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The AIBO has seen much use as an inexpensive platform for artificial intelligence education and research , because integrates a computer , Computer vision , and articulators in a package vastly cheaper than conventional research robots .", |
|
"entity_list": [ |
|
{ |
|
"name": "AIBO", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Computer vision", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "She served as Program Chair of International Conference on Computer Vision 2021 .", |
|
"entity_list": [ |
|
{ |
|
"name": "International Conference on Computer Vision 2021", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Scheinman , after receiving a fellowship from Unimation to develop his designs , sold those designs to Unimation who further developed them with support from General Motors and later marketed it as the Programmable Universal Machine for Assembly ( PUMA ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Scheinman", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Unimation", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "General Motors", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Programmable Universal Machine for Assembly", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "PUMA", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "An overview of calibration methods for binary classification and multiclass classification classification tasks is given by Gebel ( 2009 )", |
|
"entity_list": [ |
|
{ |
|
"name": "binary classification", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "multiclass classification classification tasks", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Gebel", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "He is involved in fields such as optical character recognition ( OCR ) , speech synthesis , speech recognition technology , and electronic keyboard instruments .", |
|
"entity_list": [ |
|
{ |
|
"name": "optical character recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "OCR", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "speech synthesis", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "For more recent and state-of-the-art techniques , Kaldi toolkit can be used .", |
|
"entity_list": [ |
|
{ |
|
"name": "Kaldi toolkit", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Johnson-Laird is a Fellow of the American Philosophical Society , a Fellow of the Royal Society , a Fellow of the British Academy , a William James Fellow of the Association for Psychological Science , and a Fellow of the Cognitive Science Society .", |
|
"entity_list": [ |
|
{ |
|
"name": "Johnson-Laird", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "American Philosophical Society", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Royal Society", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "British Academy", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "William James", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Association for Psychological Science", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Cognitive Science Society", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "At the IEEE International Conference on Image Processing in 2010 , Rui Hu , Mark Banard , and John Collomosse extended the HOG descriptor for use in sketch based image retrieval ( SBIR ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "IEEE International Conference on Image Processing", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Rui Hu", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Mark Banard", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "John Collomosse", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "HOG descriptor", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "sketch based image retrieval", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "SBIR", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "BLEU uses a modified form of precision to compare a candidate translation against multiple reference translations .", |
|
"entity_list": [ |
|
{ |
|
"name": "BLEU", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "precision", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "For the case of a general base space math ( Y , \\ mathcal { B } , \\ nu ) / math ( i.e. a base space which is not countable ) , one typically considers the relative entropy .", |
|
"entity_list": [ |
|
{ |
|
"name": "relative entropy", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "As of October 2011 , the already-existing partnerships with the United States ' National Park Service ( NPS ) , the United Kingdom 's Historic Scotland ( HS ) , World Monuments Fund , and Mexico 's Instituto Nacional de Antropología y Historia ( INAH ) had been greatly expanded , , CyArk website", |
|
"entity_list": [ |
|
{ |
|
"name": "United States", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "National Park Service", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "NPS", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "United Kingdom", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Historic Scotland", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "HS", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "World Monuments Fund", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Mexico", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Instituto Nacional de Antropología y Historia", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "INAH", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "CyArk", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Kernel SVMs are available in many machine-learning toolkits , including LIBSVM , MATLAB , and others .", |
|
"entity_list": [ |
|
{ |
|
"name": "Kernel SVMs", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "machine-learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "LIBSVM", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "MATLAB", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The 2009 Loebner Prize Competition was held September 6 , 2009 at the Brighton Centre , Brighton UK in conjunction with the Interspeech 2009 conference .", |
|
"entity_list": [ |
|
{ |
|
"name": "Loebner Prize Competition", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Brighton Centre", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Brighton", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "UK", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Interspeech 2009 conference", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The humanoid QRIO robot was designed as the successor to AIBO , and runs the same base R-CODE Aperios operating system .", |
|
"entity_list": [ |
|
{ |
|
"name": "QRIO robot", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "AIBO", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "R-CODE", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Aperios operating system", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Speech waveforms are generated from HMMs themselves based on the maximum likelihood criterion .", |
|
"entity_list": [ |
|
{ |
|
"name": "Speech waveforms", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "HMMs", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "maximum likelihood", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Google Translate is a free multilingual statistical machine translation and neural machine translation service developed by Google , to translate text and websites from one language into another .", |
|
"entity_list": [ |
|
{ |
|
"name": "Google Translate", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "multilingual statistical machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "neural machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Google", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Skeletons are widely used in computer vision , image analysis , pattern recognition and digital image processing for purposes such as optical character recognition , fingerprint recognition , visual inspection or compression .", |
|
"entity_list": [ |
|
{ |
|
"name": "computer vision", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "image analysis", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "pattern recognition", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "digital image processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "optical character recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "fingerprint recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "visual inspection or compression", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The ImageNet Large Scale Visual Recognition Challenge is a benchmark in object classification and detection , with millions of images and hundreds of object classes .", |
|
"entity_list": [ |
|
{ |
|
"name": "ImageNet Large Scale Visual Recognition Challenge", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "object classification and detection", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Bengio , together with Geoffrey Hinton and Yann LeCun , are referred to by some as the Godfathers of AI and Godfathers of Deep Learning .", |
|
"entity_list": [ |
|
{ |
|
"name": "Bengio", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Geoffrey Hinton", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Yann LeCun", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Godfathers of AI", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Godfathers of Deep Learning", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "He is a Life Fellow of IEEE .", |
|
"entity_list": [ |
|
{ |
|
"name": "IEEE", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "NSA Bethesda is responsible for base operational support for its major tenant , the Walter Reed National Military Medical Center .", |
|
"entity_list": [ |
|
{ |
|
"name": "NSA Bethesda", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Walter Reed National Military Medical Center", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The three major learning paradigms are supervised learning , unsupervised learning and reinforcement learning .", |
|
"entity_list": [ |
|
{ |
|
"name": "supervised learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "unsupervised learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "reinforcement learning", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Examples include control , planning and scheduling , the ability to answer diagnostic and consumer questions , handwriting recognition , natural language understanding , speech recognition and facial recognition .", |
|
"entity_list": [ |
|
{ |
|
"name": "control", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "planning and scheduling", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "answer diagnostic and consumer questions", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "handwriting recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "natural language understanding", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "facial recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 1991 he was elected as a fellow of the Association for the Advancement of Artificial Intelligence ( 1990 , founding fellow ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Association for the Advancement of Artificial Intelligence", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "However , by formulating the problem as the solution of a Toeplitz matrix and using Levinson recursion , we can relatively quickly estimate a filter with the smallest mean squared error possible .", |
|
"entity_list": [ |
|
{ |
|
"name": "Toeplitz matrix", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Levinson recursion", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "mean squared error", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In July 2011 the 15th edition of Campus Party Spain will be held at the City of Arts and Sciences in Valencia .", |
|
"entity_list": [ |
|
{ |
|
"name": "15th edition of Campus Party Spain", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "City of Arts and Sciences", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Valencia", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Often this is generally only possible at the very end of complicated games such as chess or go , since it is not computationally feasible to look ahead as far as the completion of the game , except towards the end , and instead , positions are given finite values as estimates of the degree of belief that they will lead to a win for one player or another .", |
|
"entity_list": [ |
|
{ |
|
"name": "chess", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "go", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The difference between the multinomial logit model and numerous other methods , models , algorithms , etc. with the same basic setup ( the perceptron algorithm , support vector machine s , linear discriminant analysis , etc .", |
|
"entity_list": [ |
|
{ |
|
"name": "multinomial logit model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "perceptron algorithm", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "support vector machine", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "linear discriminant analysis", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Association for Computational Linguistics , published by", |
|
"entity_list": [ |
|
{ |
|
"name": "Association for Computational Linguistics", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In computerised Facial recognition system , each face is represented by a large number of pixel values .", |
|
"entity_list": [ |
|
{ |
|
"name": "Facial recognition system", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 2002 , his son , Daniel Pearl , a journalist working for the Wall Street Journal was kidnapped and murdered in Pakistan , leading Judea and the other members of the family and friends to create the Daniel Pearl Foundation .", |
|
"entity_list": [ |
|
{ |
|
"name": "Daniel Pearl", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Wall Street Journal", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Pakistan", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Judea", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Daniel Pearl Foundation", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "As of late 2006 , Red Envelope Entertainment also expanded into producing original content with filmmakers such as John Waters .", |
|
"entity_list": [ |
|
{ |
|
"name": "Red Envelope Entertainment", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "John Waters", |
|
"type": "person" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The building is now part of the Beth Israel Deaconess Medical Center .", |
|
"entity_list": [ |
|
{ |
|
"name": "Beth Israel Deaconess Medical Center", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A common theme of this work is the adoption of a sign-theoretic perspective on issues of artificial intelligence and knowledge representation .", |
|
"entity_list": [ |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "knowledge representation", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "For instance , the term neural machine translation ( NMT ) emphasizes the fact that deep learning-based approaches to machine translation directly learn sequence-to-sequence transformations , obviating the need for intermediate steps such as word alignment and language modeling that was used in statistical machine translation ( SMT ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "neural machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "NMT", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "word alignment", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "language modeling", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "statistical machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "SMT", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Most research in the field of WSD is performed by using WordNet as a reference sense inventory for .", |
|
"entity_list": [ |
|
{ |
|
"name": "WSD", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "WordNet", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Notable former PhD students and postdoctoral researchers from his group include Richard Zemel , and Zoubin Ghahramani .", |
|
"entity_list": [ |
|
{ |
|
"name": "PhD", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Richard Zemel", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Zoubin Ghahramani", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Each prediction result or instance of a confusion matrix represents one point in the ROC space .", |
|
"entity_list": [ |
|
{ |
|
"name": "confusion matrix", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "ROC", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 1997 Thrun and his colleagues Wolfram Burgard and Dieter Fox developed the world 's first robotic tour guide in the Deutsches Museum Bonn ( 1997 ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Thrun", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Wolfram Burgard", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Dieter Fox", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "robotic tour guide", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Deutsches Museum Bonn", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "WordNet is a lexical database of semantic relation s between word s in more than 200 languages. its primary use is in automatic natural language processing and artificial intelligence applications .", |
|
"entity_list": [ |
|
{ |
|
"name": "WordNet", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "semantic relation", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "natural language processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Conferences in the field of natural language processing , such as Association for Computational Linguistics , North American Chapter of the Association for Computational Linguistics , EMNLP , and HLT , are beginning to include papers on speech processing .", |
|
"entity_list": [ |
|
{ |
|
"name": "natural language processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Association for Computational Linguistics", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "North American Chapter of the Association for Computational Linguistics", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "EMNLP", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "HLT", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "speech processing", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A set of Java programs use the lexicon to work through the variations in biomedical texts by relating words by their parts of speech , which can be helpful in web searches or searches through an electronic medical record .", |
|
"entity_list": [ |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "parts of speech", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "electronic medical record", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "There are many more recent algorithms such as LPBoost , TotalBoost , BrownBoost , xgboost , MadaBoost , , and others .", |
|
"entity_list": [ |
|
{ |
|
"name": "LPBoost", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "TotalBoost", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "BrownBoost", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "xgboost", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "MadaBoost", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "This is an example implementation in Python :", |
|
"entity_list": [ |
|
{ |
|
"name": "Python", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Mattel Intellivision game console offered the Intellivoice Voice Synthesis module in 1982 .", |
|
"entity_list": [ |
|
{ |
|
"name": "Mattel", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Intellivision", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Intellivoice Voice Synthesis", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "He also worked on machine translation , both high-accuracy knowledge-based MT and machine learning for Statistical machine translation ( such as generalized example-based MT ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "high-accuracy knowledge-based MT", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Statistical machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "generalized example-based MT", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Wolfram Mathematica ( usually termed Mathematica ) is a modern technical computing system spanning most areas of technical - including neural networks , machine learning , image processing , geometry , data science , visualizations , and others .", |
|
"entity_list": [ |
|
{ |
|
"name": "Wolfram Mathematica", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Mathematica", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "neural networks", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "image processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "geometry", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "data science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "visualizations", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The first digitally operated and programmable robot was invented by George Devol in 1954 and was ultimately called the Unimate .", |
|
"entity_list": [ |
|
{ |
|
"name": "digitally operated and programmable robot", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "George Devol", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Unimate", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Like DBNs , DBMs can learn complex and abstract internal representations of the input in tasks such as Object recognition or speech recognition , using limited , labeled data to fine-tune the representations built using a large set of unlabeled sensory input data .", |
|
"entity_list": [ |
|
{ |
|
"name": "DBNs", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "DBMs", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Object recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Scientific conferences where vision based activity recognition work often appears are ICCV and CVPR .", |
|
"entity_list": [ |
|
{ |
|
"name": "vision based activity recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "ICCV", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "CVPR", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In statistics , an expectation-maximization ( EM ) algorithm is an iterative method to find maximum likelihood or maximum a posteriori ( MAP ) estimates of parameter s in statistical model s , where the model depends on unobserved latent variable s .", |
|
"entity_list": [ |
|
{ |
|
"name": "statistics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "expectation-maximization", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "EM", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "maximum likelihood", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "maximum a posteriori", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "MAP", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "latent variable", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Similarly , investigators sometimes report the FALSE Positive Rate ( FPR ) as well as the FALSE Negative Rate ( FNR ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "FALSE Positive Rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FPR", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FALSE Negative Rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FNR", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The concept is similar to the signal to noise ratio used in the sciences and confusion matrix used in artificial intelligence .", |
|
"entity_list": [ |
|
{ |
|
"name": "signal to noise ratio", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "sciences", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "confusion matrix", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Code of Ethics on Human Augmentation , which was originally introduced by Steve Mann in 2004 and refined with Ray Kurzweil and Marvin Minsky in 2013 , was ultimately ratified at the Virtual Reality Toronto conference on June 25 , 2017 .", |
|
"entity_list": [ |
|
{ |
|
"name": "Human Augmentation", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Steve Mann", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Ray Kurzweil", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Marvin Minsky", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Virtual Reality Toronto conference", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 1913 , Walter R. Booth directed 10 films for the U.K. Kinoplastikon , presumably in collaboration with Cecil Hepworth .", |
|
"entity_list": [ |
|
{ |
|
"name": "Walter R. Booth", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "U.K. Kinoplastikon", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Cecil Hepworth", |
|
"type": "person" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "They introduced their new robot in 1961 at a trade show at Chicago 's Cow Palace .", |
|
"entity_list": [ |
|
{ |
|
"name": "Chicago", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Cow Palace", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "While some chatbot applications use extensive word-classification processes , natural language processing processors , and sophisticated Artificial intelligence , others simply scan for general keywords and generate responses using common phrases obtained from an associated library or database .", |
|
"entity_list": [ |
|
{ |
|
"name": "chatbot", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "word-classification", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "natural language processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Artificial intelligence", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The WaveNet model proposed in 2016 achieves great performance on speech quality .", |
|
"entity_list": [ |
|
{ |
|
"name": "WaveNet", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Organizations known to use ALE for Emergency management , disaster relief , ordinary communication or extraordinary situation response : American Red Cross , FEMA , Disaster Medical Assistance Team s , NATO , Federal Bureau of Investigation , United Nations , AT & T , Civil Air Patrol , ( ARES ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "ALE", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Emergency management", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "disaster relief", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "ordinary communication", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "extraordinary situation response", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "American Red Cross", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "FEMA", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Disaster Medical Assistance Team", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "NATO", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Federal Bureau of Investigation", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "United Nations", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "AT & T", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Civil Air Patrol", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "ARES", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Here , the Kronecker delta is used for simplicity ( cf. the derivative of a sigmoid function , being expressed via the function itself ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Kronecker delta", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "sigmoid function", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The theory is based in philosophical foundations , and was founded by Ray Solomonoff around 1960 . Samuel Rathmanner and Marcus Hutter .", |
|
"entity_list": [ |
|
{ |
|
"name": "Ray Solomonoff", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Samuel Rathmanner", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Marcus Hutter", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "WordNet , a freely available database originally designed as a semantic network based on psycholinguistic principles , was expanded by addition of definitions and is now also viewed as a dictionary .", |
|
"entity_list": [ |
|
{ |
|
"name": "WordNet", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "semantic network", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "psycholinguistic principles", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Advances in the field of computational imaging research is presented in several venues including publications of SIGGRAPH and the .", |
|
"entity_list": [ |
|
{ |
|
"name": "computational imaging", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "SIGGRAPH", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Classification can be thought of as two separate problems - binary classification and multiclass classification .", |
|
"entity_list": [ |
|
{ |
|
"name": "Classification", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "binary classification", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "multiclass classification", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Advanced gene finders for both prokaryotic and eukaryotic genomes typically use complex probabilistic model s , such as hidden Markov model s ( HMMs ) to combine information from a variety of different signal and content measurements .", |
|
"entity_list": [ |
|
{ |
|
"name": "probabilistic model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "hidden Markov model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "HMMs", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Neuroevolution , or neuro-evolution , is a form of artificial intelligence that uses evolutionary algorithm s to generate artificial neural network s ( ANN ) , parameters , topology and rules. and evolutionary robotics .", |
|
"entity_list": [ |
|
{ |
|
"name": "Neuroevolution", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "neuro-evolution", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "evolutionary algorithm", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "artificial neural network", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "ANN", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "evolutionary robotics", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Since IBM proposed and realized the system of BLEU Papineni et al .", |
|
"entity_list": [ |
|
{ |
|
"name": "IBM", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "BLEU", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Papineni", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 2009 , experts attended a conference hosted by the Association for the Advancement of Artificial Intelligence ( AAAI ) to discuss whether computers and robots might be able to acquire any autonomy , and how much these abilities might pose a threat or hazard .", |
|
"entity_list": [ |
|
{ |
|
"name": "Association for the Advancement of Artificial Intelligence", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "AAAI", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "After boosting , a classifier constructed from 200 features could yield a 95 % detection rate under a ^ { -5 } / math FALSE positive rate .P. Viola , M. Jones , Robust Real-time Object Detection , 2001 .", |
|
"entity_list": [ |
|
{ |
|
"name": "FALSE positive rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": ".P. Viola", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "M. Jones", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Robust Real-time Object Detection", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The website was originally Perl -based , but IMDb no longer discloses what software it uses for reasons of security .", |
|
"entity_list": [ |
|
{ |
|
"name": "Perl", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "IMDb", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The start-up was founded by Demis Hassabis , Shane Legg and Mustafa Suleyman in 2010 .", |
|
"entity_list": [ |
|
{ |
|
"name": "Demis Hassabis", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Shane Legg", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Mustafa Suleyman", |
|
"type": "person" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Two very commonly used loss functions are the mean squared error , mathL ( a ) = a ^ 2 / math , and the absolute loss , mathL ( a ) = | a | / math .", |
|
"entity_list": [ |
|
{ |
|
"name": "loss functions", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "mean squared error", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "absolute loss", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The soft-margin support vector machine described above is an example of an empirical risk minimization ( ERM ) for the hinge loss .", |
|
"entity_list": [ |
|
{ |
|
"name": "support vector machine", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "empirical risk minimization", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "ERM", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "hinge loss", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A deep learning based approach to MT , neural machine translation has made rapid progress in recent years , and Google has announced its translation services are now using this technology in preference to its previous statistical methods .", |
|
"entity_list": [ |
|
{ |
|
"name": "deep learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "MT", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "neural machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Google", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "This tends to yield very large performance gains when working with large corpora such as WordNet .", |
|
"entity_list": [ |
|
{ |
|
"name": "WordNet", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Face detection is used in biometrics , often as a part of ( or together with ) a facial recognition system .", |
|
"entity_list": [ |
|
{ |
|
"name": "Face detection", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "biometrics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "facial recognition system", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "trained by maximum likelihood estimation .", |
|
"entity_list": [ |
|
{ |
|
"name": "maximum likelihood estimation", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": ", Ltd. in Thailand ; Komatsu ( Shanghai ) Ltd. in 1996 in Shanghai , China ; Industrial Power Alliance Ltd. in Japan , a joint venture with Cummins , in 1998 ; L & T-Komatsu Limited in India in 1998 ( shares sold in 2013 ) ; and Komatsu Brasil International Ltda. in Brazil in 1998 .", |
|
"entity_list": [ |
|
{ |
|
"name": "Thailand", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Komatsu ( Shanghai ) Ltd.", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Shanghai", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "China", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Industrial Power Alliance Ltd.", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Japan", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Cummins", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "L & T-Komatsu Limited", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "India", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Komatsu Brasil International Ltda.", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Brazil", |
|
"type": "country" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "dgp also occasionally hosts artists in residence ( e.g. , Oscar -winner Chris Landreth .", |
|
"entity_list": [ |
|
{ |
|
"name": "dgp", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "artists in residence", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Oscar", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Chris Landreth", |
|
"type": "person" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "It currently includes four sub-competitions - the RoboMaster Robotics Competition , the RoboMaster Technical Challenge , the ICRA RoboMaster AI Challenge , and the new RoboMaster Youth Tournament .", |
|
"entity_list": [ |
|
{ |
|
"name": "RoboMaster Robotics Competition", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "RoboMaster Technical Challenge", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "ICRA RoboMaster AI Challenge", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "RoboMaster Youth Tournament", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "By the early 2000s , the dominant speech processing strategy started to shift away from Hidden Markov model towards more modern neural networks and deep learning .", |
|
"entity_list": [ |
|
{ |
|
"name": "speech processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Hidden Markov model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "neural networks", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "deep learning", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Another equivalent expression , in the case of a binary target rate , is that the TRUE positive rate and the FALSE positive rate are equal ( and therefore the FALSE negative rate and the TRUE negative rate are equal ) for every value of the sensitive characteristics :", |
|
"entity_list": [ |
|
{ |
|
"name": "binary target rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TRUE positive rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FALSE positive rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FALSE negative rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TRUE negative rate", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The MATLAB function ,", |
|
"entity_list": [ |
|
{ |
|
"name": "MATLAB", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "An articulated robot is a robot with rotary joint s ( e.g. a legged robot or an industrial robot ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "articulated robot", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "rotary joint", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "industrial robot", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Pandora ( also known as Pandora Media or Pandora Radio ) is an American music streaming and automated Recommender system internet radio service powered by the Music Genome Project and headquartered in Oakland , California .", |
|
"entity_list": [ |
|
{ |
|
"name": "Pandora", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Pandora Media", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Pandora Radio", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "American", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "automated Recommender system", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Music Genome Project", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Oakland", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "California", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "She is a board member of the International Machine Learning Society , has been a member of AAAI Executive council , was PC co-chair of ICML 2011 , and has served as senior PC member for conferences including AAAI , ICML , IJCAI , ISWC , KDD , SIGMOD , UAI , VLDB , WSDM and WWW .", |
|
"entity_list": [ |
|
{ |
|
"name": "International Machine Learning Society", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "AAAI Executive council", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "ICML 2011", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "AAAI", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "ICML", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "IJCAI", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "ISWC", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "KDD", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "SIGMOD", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "UAI", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "VLDB", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "WSDM", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "WWW", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "James S. Albus of the National Institute of Standards and Technology ( NIST ) developed the Robocrane , where the platform hangs from six cables instead of being supported by six jacks .", |
|
"entity_list": [ |
|
{ |
|
"name": "James S. Albus", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "National Institute of Standards and Technology", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "NIST", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Robocrane", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Another class of direct search algorithms are the various evolutionary algorithm s , e.g. genetic algorithm s .", |
|
"entity_list": [ |
|
{ |
|
"name": "direct search algorithms", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "evolutionary algorithm", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "genetic algorithm", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "KUKA is a German manufacturer of industrial robot s and solution s for factory automation .", |
|
"entity_list": [ |
|
{ |
|
"name": "KUKA", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "German", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "industrial robot", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Other films between 2016 to 2020 that captured with IMAX camera 's were Zack Snyder ' s Batman v Superman : Dawn of Justice , Clint Eastwood ' s Sully , Damien Chazelle ' s First Man , Patty Jenkins ' Wonder Woman 1984 , Cary Joji Fukunaga ' s No Time to Die and Joseph Kosinski ' s Top Gun : Maverick .", |
|
"entity_list": [ |
|
{ |
|
"name": "IMAX", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Zack Snyder", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Batman v Superman : Dawn of Justice", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Clint Eastwood", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Sully", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Damien Chazelle", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "First Man", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Patty Jenkins", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Wonder Woman 1984", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Cary Joji Fukunaga", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "No Time to Die", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Joseph Kosinski", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Top Gun : Maverick", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The trial of MICR E13B font was shown to the American Bankers Association ( ABA ) in July 1956 , which adopted it in 1958 as the MICR standard for negotiable document s in the United States .", |
|
"entity_list": [ |
|
{ |
|
"name": "MICR E13B", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "American Bankers Association", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "ABA", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "MICR", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "United States", |
|
"type": "country" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Local search algorithms are widely applied to numerous hard computational problems , including problems from computer science ( particularly artificial intelligence ) , mathematics , operations research , engineering , and bioinformatics .", |
|
"entity_list": [ |
|
{ |
|
"name": "Local search algorithms", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "computer science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "mathematics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "operations research", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "engineering", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "bioinformatics", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Gerd Gigerenzer ( born September 3 , 1947 , Wallersdorf , Germany ) is a Germany psychologist who has studied the use of bounded rationality and heuristic s in decision making .", |
|
"entity_list": [ |
|
{ |
|
"name": "Gerd Gigerenzer", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Wallersdorf", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Germany", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "bounded rationality", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "heuristic", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "decision making", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "to minimize the Mean squared error .", |
|
"entity_list": [ |
|
{ |
|
"name": "Mean squared error", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "But even an official language with a regulating academy , such as Standard French with the Académie française , is classified as a natural language ( for example , in the field of natural language processing ) , as its prescriptive points do not make it either constructed enough to be classified as a constructed language or controlled enough to be classified as a controlled natural language .", |
|
"entity_list": [ |
|
{ |
|
"name": "Standard French", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Académie française", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "natural language processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "constructed language", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "controlled natural language", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "There are a number of other metrics , most simply the accuracy or Fraction Correct ( FC ) , which measures the fraction of all instances that are correctly categorized ; the complement is the Fraction Incorrect ( FiC ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "accuracy", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Fraction Correct", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FC", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Fraction Incorrect", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FiC", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Cardie became a Fellow of the Association for Computational Linguistics in 2016 .", |
|
"entity_list": [ |
|
{ |
|
"name": "Cardie", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Association for Computational Linguistics", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Learning the parameters math \\ theta / math is usually done by maximum likelihood learning for mathp ( Y _ i | X _ i ; \\ theta ) / math .", |
|
"entity_list": [ |
|
{ |
|
"name": "maximum likelihood learning", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Cluster analysis , and Non-negative matrix factorization for descriptive mining .", |
|
"entity_list": [ |
|
{ |
|
"name": "Cluster analysis", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Non-negative matrix factorization", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "descriptive mining", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In computer science and the information technology that it enables , it has been a long-term challenge to the ability in computers to do natural language processing and machine learning .", |
|
"entity_list": [ |
|
{ |
|
"name": "computer science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "information technology", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "natural language processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "( Code for Gabor feature extraction from images in MATLAB can be found at", |
|
"entity_list": [ |
|
{ |
|
"name": "Gabor feature extraction", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "MATLAB", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The NeuralExpert centers the design specifications around the type of problem the user would like the neural network to solve ( Classification , Prediction , Function approximation or Cluster analysis ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "NeuralExpert", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "neural network", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Classification", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Prediction", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Function approximation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Cluster analysis", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "When the quantization step size ( Δ ) is small relative to the variation in the signal being quantized , it is relatively simple to show that the mean squared error produced by such a rounding operation will be approximately math \\ Delta ^ 2 / 12 / math.math", |
|
"entity_list": [ |
|
{ |
|
"name": "quantization step size", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "mean squared error", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The construction of a rich lexicon with a suitable ontology requires significant effort , e.g. , Wordnet lexicon required many person-years of effort. G. A. Miller , R. Beckwith , C. D. Fellbaum , D. Gross , K. Miller .", |
|
"entity_list": [ |
|
{ |
|
"name": "Wordnet", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "G. A. Miller", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "R. Beckwith", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "C. D. Fellbaum", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "D. Gross", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "K. Miller", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Kawasaki 's portfolio also includes retractable roofs , floors and other giant structures , the Sapporo Dome ' retractable surface is one example .", |
|
"entity_list": [ |
|
{ |
|
"name": "Kawasaki", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Sapporo Dome", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Kappa statistics such as Fleiss ' kappa and Cohen 's kappa are methods for calculating inter-rater reliability based on different assumptions about the marginal or prior distributions , and are increasingly used as chance corrected alternatives to accuracy in other contexts .", |
|
"entity_list": [ |
|
{ |
|
"name": "Kappa statistics", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Fleiss ' kappa", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Cohen 's kappa", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "inter-rater reliability", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "accuracy", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "With his students Sepp Hochreiter , Felix Gers , Fred Cummins , Alex Graves , and others , Schmidhuber published increasingly sophisticated versions of a type of recurrent neural network called the long short-term memory ( LSTM ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Sepp Hochreiter", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Felix Gers", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Fred Cummins", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Alex Graves", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Schmidhuber", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "recurrent neural network", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "long short-term memory", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "LSTM", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "2004 - The first Cobot KUKA LBR 3 is released .", |
|
"entity_list": [ |
|
{ |
|
"name": "Cobot KUKA LBR 3", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Two shallow approaches used to train and then disambiguate are Naive Bayes classifier and decision trees .", |
|
"entity_list": [ |
|
{ |
|
"name": "Naive Bayes classifier", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "decision trees", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The first practical forms of photography were introduced in January 1839 by Louis Daguerre and Henry Fox Talbot .", |
|
"entity_list": [ |
|
{ |
|
"name": "photography", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Louis Daguerre", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Henry Fox Talbot", |
|
"type": "person" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "For example , speech synthesis , combined with speech recognition , allows for interaction with mobile devices via language processing interfaces .", |
|
"entity_list": [ |
|
{ |
|
"name": "speech synthesis", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "language processing", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Phidgets can be programmed using a variety of software and programming languages , ranging from Java to Microsoft Excel .", |
|
"entity_list": [ |
|
{ |
|
"name": "Phidgets", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Microsoft Excel", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The term machine learning was coined in 1959 by Arthur Samuel , an American IBMer and pioneer in the field of computer gaming and artificial intelligence .", |
|
"entity_list": [ |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Arthur Samuel", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "American IBMer", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "computer gaming", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Israeli poet David Avidan , who was fascinated with future technologies and their relation to art , desired to explore the use of computers for writing literature .", |
|
"entity_list": [ |
|
{ |
|
"name": "Israeli", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "David Avidan", |
|
"type": "person" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "As part of the GATEway Project in 2017 , Oxbotica trialled seven autonomous shuttle buses in Greenwich , navigating a two-mile riverside path near London 's The O2 Arena on a route also used by pedestrians and cyclists .", |
|
"entity_list": [ |
|
{ |
|
"name": "GATEway Project", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Oxbotica", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Greenwich", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "London", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "The O2 Arena", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "An unrelated but commonly used combination of basic statistics from information retrieval is the F-score , being a ( possibly weighted ) harmonic mean of recall and precision where recall = sensitivity = TRUE positive rate , but specificity and precision are totally different measures .", |
|
"entity_list": [ |
|
{ |
|
"name": "information retrieval", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "F-score", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "harmonic mean", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "recall", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "precision", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "sensitivity", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TRUE positive rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "specificity", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Neuromorphic engineering is an interdisciplinary subject that takes inspiration from biology , physics , mathematics , computer science , and electronic engineering to design artificial neural systems , such as vision systems , head-eye systems , auditory processors , and autonomous robots , whose physical architecture and design principles are based on those of biological nervous systems .", |
|
"entity_list": [ |
|
{ |
|
"name": "Neuromorphic engineering", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "biology", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "physics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "mathematics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computer science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "electronic engineering", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "vision systems", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "head-eye systems", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "auditory processors", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "autonomous robots", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "biological nervous systems", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "To be specific , the BIBO stability criterion requires that the ROC of the system includes the unit circle .", |
|
"entity_list": [ |
|
{ |
|
"name": "BIBO stability criterion", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "ROC", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "2 The program was rewritten in Java beginning in 1998 .", |
|
"entity_list": [ |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The MCC can be calculated directly from the confusion matrix using the formula :", |
|
"entity_list": [ |
|
{ |
|
"name": "MCC", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "confusion matrix", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "It was developed by a team at the MIT-IBM Watson AI Lab and first presented at the 2018 International Conference on Learning Representations .", |
|
"entity_list": [ |
|
{ |
|
"name": "MIT-IBM Watson AI Lab", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "2018 International Conference on Learning Representations", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "When the TRUE prevalence s for the two positive variables are equal as assumed in Fleiss kappa and F-score , that is the number of positive predictions matches the number of positive classes in the dichotomous ( two class ) case , the different kappa and correlation measure collapse to identity with Youden 's J , and recall , precision and F-score are similarly identical with accuracy .", |
|
"entity_list": [ |
|
{ |
|
"name": "TRUE prevalence", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Fleiss kappa", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "F-score", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "kappa", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "correlation", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Youden 's J", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "recall", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "precision", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "accuracy", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Building Educational Applications workshop ( BEA ) at NAACL 2013 hosted the inaugural NLI shared task. Tetreault et al , 2013 The competition resulted in 29 entries from teams across the globe , 24 of which also published a paper describing their systems and approaches .", |
|
"entity_list": [ |
|
{ |
|
"name": "Building Educational Applications workshop", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "BEA", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "NAACL", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "NLI shared task.", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Tetreault", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Viterbi algorithm is a dynamic programming algorithm for finding the most likely sequence of hidden states called the Viterbi path that results in a sequence of observed events , especially in the context of Markov information source s and hidden Markov model s ( HMM ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Viterbi algorithm", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "dynamic programming algorithm", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "hidden states", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Viterbi path", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Markov information source", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "hidden Markov model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "HMM", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In statistics , multinomial logistic regression is a classification method that generalizes logistic regression to multiclass classification , i.e. with more than two possible discrete outcomes .", |
|
"entity_list": [ |
|
{ |
|
"name": "statistics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "multinomial logistic regression", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "classification method", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "logistic regression", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "multiclass classification", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Hidden Markov models are known for their applications to reinforcement learning and temporal pattern recognition such as speech , handwriting recognition , gesture recognition , Thad Starner , Alex Pentland .", |
|
"entity_list": [ |
|
{ |
|
"name": "Hidden Markov models", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "reinforcement learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "temporal pattern recognition", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "speech", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "handwriting recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "gesture recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Thad Starner", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Alex Pentland", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Essentially , this means that if the n-gram has been seen more than k times in training , the conditional probability of a word given its history is proportional to the maximum likelihood estimate of that n -gram .", |
|
"entity_list": [ |
|
{ |
|
"name": "n-gram", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "maximum likelihood estimate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "n -gram", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "He is interested in knowledge representation , commonsense reasoning , and natural language understanding , believing that deep language understanding can only currently be achieved by significant hand-engineering of semantically-rich formalisms coupled with statistical preferences .", |
|
"entity_list": [ |
|
{ |
|
"name": "knowledge representation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "commonsense reasoning", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "natural language understanding", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "deep language understanding", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "hand-engineering", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In JavaScript , Python or", |
|
"entity_list": [ |
|
{ |
|
"name": "JavaScript", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Python", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Newcomb Awards are announced in the AI Magazine published by AAAI .", |
|
"entity_list": [ |
|
{ |
|
"name": "Newcomb Awards", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "AI Magazine", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "AAAI", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Mean squared error on a test set of 100 exemplars is 0.084 , smaller than the unnormalized error .", |
|
"entity_list": [ |
|
{ |
|
"name": "Mean squared error", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The F-score has been widely used in the natural language processing literature , such as the evaluation of named entity recognition ( NER ) and word segmentation .", |
|
"entity_list": [ |
|
{ |
|
"name": "F-score", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "natural language processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "named entity recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "NER", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "word segmentation", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Chatbots are typically used in dialog systems for various purposes including customer service , request routing , or for information gathering .", |
|
"entity_list": [ |
|
{ |
|
"name": "Chatbots", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "dialog systems", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "request routing", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "information gathering", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Important journals include the IEEE Transactions on Speech and Audio Processing ( later renamed IEEE Transactions on Audio , Speech and Language Processing and since Sept 2014 renamed IEEE / ACM Transactions on Audio , Speech and Language Processing - after merging with an ACM publication ) , Computer Speech and Language , and Speech Communication .", |
|
"entity_list": [ |
|
{ |
|
"name": "IEEE Transactions on Speech and Audio Processing", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "IEEE Transactions on Audio , Speech and Language Processing", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "IEEE / ACM Transactions on Audio , Speech and Language Processing", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "ACM", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Computer Speech and Language", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Speech Communication", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "EM is frequently used for data clustering in machine learning and computer vision .", |
|
"entity_list": [ |
|
{ |
|
"name": "EM", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "data clustering", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computer vision", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "While there is no perfect way of describing the confusion matrix of TRUE and FALSE positives and negatives by a single number , the Matthews correlation coefficient is generally regarded as being one of the best such measures .", |
|
"entity_list": [ |
|
{ |
|
"name": "confusion matrix", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Matthews correlation coefficient", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "As data set s have grown in size and complexity , direct hands-on data analysis has been augmented with indirect , automated data processing , aided by other discoveries in computer science , specially in the field of machine learning , such as neural networks , cluster analysis , genetic algorithms ( 1950s ) , decision tree learning and decision rules ( 1960s ) , and support vector machines ( 1990s ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "data analysis", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computer science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "neural networks", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "cluster analysis", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "genetic algorithms", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "decision tree learning", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "decision rules", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "support vector machines", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In the fall of 2005 , Thrun published a textbook entitled Probabilistic Robotics together with his long-term co-workers Dieter Fox and Wolfram Burgard .", |
|
"entity_list": [ |
|
{ |
|
"name": "Thrun", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Probabilistic Robotics", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Dieter Fox", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Wolfram Burgard", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "John D. Lafferty , Andrew McCallum and Pereiramath as follows :", |
|
"entity_list": [ |
|
{ |
|
"name": "John D. Lafferty", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Andrew McCallum", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Pereiramath", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Question answering ( QA ) is a computer science discipline within the fields of information retrieval and natural language processing ( NLP ) , which is concerned with building systems that automatically answer questions posed by humans in a natural language .", |
|
"entity_list": [ |
|
{ |
|
"name": "Question answering", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "QA", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "computer science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "information retrieval", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "natural language processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "NLP", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "However , in the version of the metric used by NIST evaluations prior to 2009 , the shortest reference sentence had been used instead .", |
|
"entity_list": [ |
|
{ |
|
"name": "NIST", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "On August 27 , 2018 , Toyota announced an investment of $ 500 Million in Uber ' s autonomous car s .", |
|
"entity_list": [ |
|
{ |
|
"name": "Toyota", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Uber", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "autonomous car", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The sample maximum is the maximum likelihood estimator for the population maximum , but , as discussed above , it is biased .", |
|
"entity_list": [ |
|
{ |
|
"name": "maximum likelihood estimator", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "LSI helps overcome synonymy by increasing recall , one of the most problematic constraints of Boolean keyword queries and vector space models .", |
|
"entity_list": [ |
|
{ |
|
"name": "LSI", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "synonymy", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "recall", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Boolean keyword queries", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "vector space models", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Data acquisition applications are usually controlled by software programs developed using various general purpose programming languages such as Assembly , BASIC , C , C + + , C # , Fortran , Java , LabVIEW , Lisp , Pascal , etc .", |
|
"entity_list": [ |
|
{ |
|
"name": "Data acquisition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Assembly", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "BASIC", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "C", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "C + +", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "C #", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Fortran", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "LabVIEW", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Lisp", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Pascal", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 2003 , Honda released its Cog advertisement in the UK and on the Internet .", |
|
"entity_list": [ |
|
{ |
|
"name": "Honda", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Cog", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "UK", |
|
"type": "country" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Association for Computational Linguistics defines computational linguistics as :", |
|
"entity_list": [ |
|
{ |
|
"name": "Association for Computational Linguistics", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "computational linguistics", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Expectation-maximization algorithm s may be employed to calculate approximate maximum likelihood estimates of unknown state-space parameters within minimum-variance filters and smoothers .", |
|
"entity_list": [ |
|
{ |
|
"name": "Expectation-maximization algorithm", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "maximum likelihood estimates", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Correspondents included former Baywatch actresses Donna D 'Errico , Carmen Electra , and Traci Bingham , former Playboy Playmate Heidi Mark , comedian Arj Barker and identical twins Randy and Jason Sklar .", |
|
"entity_list": [ |
|
{ |
|
"name": "Baywatch", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Donna D 'Errico", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Carmen Electra", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Traci Bingham", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Playboy Playmate", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Heidi Mark", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Arj Barker", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Randy", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Jason Sklar", |
|
"type": "person" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "It is commonly used to generate representations for speech recognition ( ASR ) , e.g. the CMU Sphinx system , and speech synthesis ( TTS ) , e.g. the Festival system .", |
|
"entity_list": [ |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "ASR", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "CMU Sphinx system", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "speech synthesis", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "TTS", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Festival system", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Sensitivity or TRUE Positive Rate ( TPR ) , also known as recall , is the proportion of people that tested positive and are positive ( TRUE Positive , TP ) of all the people that actually are positive ( Condition Positive , CP = TP + FN ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Sensitivity", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TRUE Positive Rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TPR", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "recall", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TRUE Positive", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TP", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Condition Positive", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "CP", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TP + FN", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Popular speech recognition conferences held each year or two include SpeechTEK and SpeechTEK Europe , ICASSP , Interspeech / Eurospeech , and the IEEE ASRU .", |
|
"entity_list": [ |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "SpeechTEK", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "SpeechTEK Europe", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "ICASSP", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Interspeech", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Eurospeech", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "IEEE ASRU", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Devol collaborated with Engelberger , who served as president of the company , to engineer and produce an industrial robot under the brand name Unimate .", |
|
"entity_list": [ |
|
{ |
|
"name": "Devol", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Engelberger", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "industrial robot", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Unimate", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A Hidden Markov model ( HMM ) is a statistical Markov model in which the system being modeled is assumed to be a Markov process with unobserved ( hidden ) states .", |
|
"entity_list": [ |
|
{ |
|
"name": "Hidden Markov model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "HMM", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "statistical Markov model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Markov process", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "This property , undesirable in many applications , has led researchers to use alternatives such as the mean absolute error , or those based on the median .", |
|
"entity_list": [ |
|
{ |
|
"name": "mean absolute error", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "median", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Such a sequence ( which depends on the outcome of the investigation of previous attributes at each stage ) is called a decision tree and applied in the area of machine learning known as decision tree learning .", |
|
"entity_list": [ |
|
{ |
|
"name": "decision tree", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "decision tree learning", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "As in factor analysis , the LCA can also be used to classify case according to their maximum likelihood class membership .", |
|
"entity_list": [ |
|
{ |
|
"name": "factor analysis", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "LCA", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "maximum likelihood", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Supervised neural networks that use a mean squared error ( MSE ) cost function can use formal statistical methods to determine the confidence of the trained model .", |
|
"entity_list": [ |
|
{ |
|
"name": "Supervised neural networks", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "mean squared error", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "MSE", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "cost function", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "This can be directly expressed as a linear program , but it is also equivalent to Tikhonov regularization with the hinge loss function , mathV ( f ( x ) , y ) = \\ max ( 0 , 1 - yf ( x ) ) / math :", |
|
"entity_list": [ |
|
{ |
|
"name": "Tikhonov regularization", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "hinge loss function", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The following technique was described in Breiman 's original paper and is implemented in the R package randomForest .", |
|
"entity_list": [ |
|
{ |
|
"name": "Breiman", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "R package randomForest", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Traditional image quality measures , such as PSNR , are typically performed on fixed resolution images and do not take into account some aspects of the human visual system , like the change in spatial resolution across the retina .", |
|
"entity_list": [ |
|
{ |
|
"name": "PSNR", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "retina", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "John Ireland , Joanne Dru and Macdonald Carey starred in the Jack Broder color production Hannah Lee , which premiered June 19 , 1953 .", |
|
"entity_list": [ |
|
{ |
|
"name": "John Ireland", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Joanne Dru", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Macdonald Carey", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Jack Broder", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Hannah Lee", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "That process is called image registration , and uses different methods of computer vision , mostly related to tracking .", |
|
"entity_list": [ |
|
{ |
|
"name": "image registration", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "computer vision", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "tracking", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Now let us start explaining the different possible relations between predicted and actual outcome : Confusion matrix", |
|
"entity_list": [] |
|
}, |
|
{ |
|
"sentence": "The VOICEBOX speech processing toolbox for MATLAB implements the conversion and its inverse as :", |
|
"entity_list": [ |
|
{ |
|
"name": "VOICEBOX", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "speech processing toolbox", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "MATLAB", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Prolog is a logic programming language associated with artificial intelligence and computational linguistics .", |
|
"entity_list": [ |
|
{ |
|
"name": "Prolog", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computational linguistics", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Milner has received numerous awards for her contributions to neuroscience and psychology including memberships in the Royal Society of London , the Royal Society of Canada and the National Academy of Sciences .", |
|
"entity_list": [ |
|
{ |
|
"name": "Milner", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "neuroscience", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "psychology", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Royal Society of London", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Royal Society of Canada", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "National Academy of Sciences", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "By combining these operators one can obtain algorithms for many image processing tasks , such as feature extraction , image segmentation , image sharpening , image filtering , and classification .", |
|
"entity_list": [ |
|
{ |
|
"name": "image processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "feature extraction", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "image segmentation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "image sharpening", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "image filtering", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "classification", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "As of 2017 , he is a professor at the Collège de France and , since 1989 , the director of INSERM Unit 562 , Cognitive Neuroimaging .", |
|
"entity_list": [ |
|
{ |
|
"name": "Collège de France", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "INSERM Unit 562", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Cognitive Neuroimaging", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "There are many approaches to learning these embeddings , notably using Bayesian clustering frameworks or energy-based frameworks , and more recently , TransE ( Conference on Neural Information Processing Systems 2013 ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Bayesian clustering frameworks", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "energy-based frameworks", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "TransE", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Conference on Neural Information Processing Systems 2013", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "It is an alternative to the Word error rate ( Word Error Rate ) used in several countries .", |
|
"entity_list": [ |
|
{ |
|
"name": "Word error rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Word Error Rate", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "ANNs have been used on a variety of tasks , including computer vision , speech recognition , machine translation , social network filtering , playing board and video games , medical diagnosis , and even in activities that have traditionally been considered as reserved to humans , like painting .", |
|
"entity_list": [ |
|
{ |
|
"name": "ANNs", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "computer vision", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "social network filtering", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "playing board and video games", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "medical diagnosis", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "painting", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Modular Audio Recognition Framework ( MARF ) is an open-source research platform and a collection of voice , sound , speech , text and natural language processing ( NLP ) algorithm s written in Java and arranged into a modular and extensible framework that attempts to facilitate addition of new algorithm s .", |
|
"entity_list": [ |
|
{ |
|
"name": "Modular Audio Recognition Framework", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "MARF", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "natural language processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "NLP", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 2018 , a report by the civil liberties and rights campaigning organisation Big Brother Watch revealed that two United Kingdom police forces , South Wales Police and the Metropolitan Police , were using live facial recognition at public events and in public spaces , in September 2019 , South Wales Police use of facial recognition was ruled lawful .", |
|
"entity_list": [ |
|
{ |
|
"name": "Big Brother Watch", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "United Kingdom", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "South Wales Police", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Metropolitan Police", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "facial recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "ANIMAL has been ported to R , a freely available language and environment for statistical computing and graphics .", |
|
"entity_list": [ |
|
{ |
|
"name": "ANIMAL", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "R", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "statistical computing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "graphics", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Time-inhomogeneous hidden Bernoulli model ( TI-HBM ) is an alternative to hidden Markov model ( HMM ) for automatic speech recognition .", |
|
"entity_list": [ |
|
{ |
|
"name": "Time-inhomogeneous hidden Bernoulli model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "TI-HBM", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "hidden Markov model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "HMM", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "automatic speech recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In July 2016 , Nvidia demonstrated during SIGGRAPH a new method of foveated rendering claimed to be invisible to users .", |
|
"entity_list": [ |
|
{ |
|
"name": "Nvidia", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "SIGGRAPH", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Both rely on speech act theory developed by John Searle in the 1960s and enhanced by Terry Winograd and Flores in the 1970s .", |
|
"entity_list": [ |
|
{ |
|
"name": "speech act theory", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "John Searle", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Terry Winograd", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Flores", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Neural network models of concept formation and the structure of knowledge have opened powerful hierarchical models of knowledge organization such as George Miller ' s Wordnet .", |
|
"entity_list": [ |
|
{ |
|
"name": "Neural network models", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "George Miller", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Wordnet", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Template matching has various applications and is used in such fields as face recognition ( see facial recognition system ) and medical image processing .", |
|
"entity_list": [ |
|
{ |
|
"name": "Template matching", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "face recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "facial recognition system", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "medical image processing", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "However , usage only became widespread in 2005 when Navneet Dalal and Bill Triggs , researchers for the French National Institute for Research in Computer Science and Automation ( INRIA ) , presented their supplementary work on HOG descriptors at the Conference on Computer Vision and Pattern Recognition ( CVPR ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Navneet Dalal", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Bill Triggs", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "French National Institute for Research in Computer Science and Automation", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "INRIA", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "HOG descriptors", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Conference on Computer Vision and Pattern Recognition", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "CVPR", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Prior to joining the Penn faculty in 2002 , he spent a decade ( 1991-2001 ) in AT & T Labs and Bell Labs , including as head of the AI department with colleagues including Michael L. Littman , David A. McAllester , and Richard S. Sutton ; Secure Systems Research department ; and Machine Learning department with members such as Michael Collins and the leader ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Penn", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "AT & T Labs", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Bell Labs", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "AI", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Michael L. Littman", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "David A. McAllester", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Richard S. Sutton", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Secure Systems Research department", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Machine Learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Michael Collins", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "When data are unlabelled , supervised learning is not possible , and an unsupervised learning approach is required which attempts to find natural Cluster analysis to groups , and then map new data to these formed groups .", |
|
"entity_list": [ |
|
{ |
|
"name": "supervised learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "unsupervised learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Cluster analysis", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "This field of computer science developed in the 1950s at academic institutions such as the MIT A.I. Lab , originally as a branch of artificial intelligence and robotics .", |
|
"entity_list": [ |
|
{ |
|
"name": "computer science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "MIT A.I. Lab", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "robotics", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "It could also be replaced by the Log loss equation below :", |
|
"entity_list": [ |
|
{ |
|
"name": "Log loss", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Shirley Ryan AbilityLab ( formerly the Rehabilitation Institute of Chicago ) , University of California at Berkeley , MIT , Stanford University , and University of Twente in the Netherlands are the researching leaders in biomechatronics .", |
|
"entity_list": [ |
|
{ |
|
"name": "Shirley Ryan AbilityLab", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Rehabilitation Institute of Chicago", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "University of California at Berkeley", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "MIT", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Stanford University", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "University of Twente", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Netherlands", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "biomechatronics", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Given a set of predicted values and a corresponding set of actual values for X for various time periods , a common evaluation technique is to use the mean squared prediction error ; other measures are also available ( see forecasting # forecasting accuracy ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "mean squared prediction error", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "forecasting accuracy", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Other measures , such as the proportion of correct predictions ( also termed accuracy ) , are not useful when the two classes are of very different sizes .", |
|
"entity_list": [ |
|
{ |
|
"name": "accuracy", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The first alpha version of OpenCV was released to the public at the Conference on Computer Vision and Pattern Recognition in 2000 , and five betas were released between 2001 and 2005 .", |
|
"entity_list": [ |
|
{ |
|
"name": "OpenCV", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Computer Vision and Pattern Recognition", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Results have been presented which give correlation of up to 0.964 with human judgement at the corpus level , compared to BLEU ' s achievement of 0.817 on the same data set .", |
|
"entity_list": [ |
|
{ |
|
"name": "BLEU", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "An early version of VMAF has been shown to outperform other image and video quality metrics such as SSIM , PSNR -HVS and VQM-VFD on three of four datasets in terms of prediction accuracy , when compared to subjective ratings .", |
|
"entity_list": [ |
|
{ |
|
"name": "VMAF", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "SSIM", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "PSNR -HVS", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "VQM-VFD", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "accuracy", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "For example , the ambiguity of ' mouse ' ( animal or device ) is not relevant in machine translation , but is relevant in information retrieval .", |
|
"entity_list": [ |
|
{ |
|
"name": "machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "information retrieval", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Geometric hashing was originally suggested in computer vision for object recognition in 2D and 3D ,", |
|
"entity_list": [ |
|
{ |
|
"name": "Geometric hashing", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "computer vision", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "object recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "It forms one of the three main categories of machine learning , along with supervised learning and reinforcement learning .", |
|
"entity_list": [ |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "supervised learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "reinforcement learning", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Reinforcement learning , due to its generality , is studied in many other disciplines , such as game , control theory , operations research , information theory , simulation-based optimization , multi-agent systems , swarm intelligence , statistics and genetic algorithm s .", |
|
"entity_list": [ |
|
{ |
|
"name": "Reinforcement learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "game", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "control theory", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "operations research", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "information theory", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "simulation-based optimization", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "multi-agent systems", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "swarm intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "statistics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "genetic algorithm", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Pattern recognition is closely related to artificial intelligence and machine learning ,", |
|
"entity_list": [ |
|
{ |
|
"name": "Pattern recognition", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The software is used to design , train and deploy neural network ( supervised learning and unsupervised learning ) models to perform a wide variety of tasks such as data mining , classification , function approximation , multivariate regression and time-series prediction .", |
|
"entity_list": [ |
|
{ |
|
"name": "neural network", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "supervised learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "unsupervised learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "data mining", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "classification", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "function approximation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "multivariate regression", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "time-series prediction", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 2016 , he was elected Fellow of Association for the Advancement of Artificial Intelligence .", |
|
"entity_list": [ |
|
{ |
|
"name": "Association for the Advancement of Artificial Intelligence", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "She serves as a member of the National Academy of Sciences ( since 2005 ) , American Academy of Arts and Sciences ( since 2009 ) ,", |
|
"entity_list": [ |
|
{ |
|
"name": "National Academy of Sciences", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "American Academy of Arts and Sciences", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "During the 1973 Yom Kippur War , Soviet-supplied surface-to-air missile batteries in Egypt and Syria caused heavy damage Israeli fighter jet s .", |
|
"entity_list": [ |
|
{ |
|
"name": "Yom Kippur War", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "surface-to-air missile", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Egypt", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Syria", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Israeli", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Another resource ( free but copyrighted ) is the HTK book ( and the accompanying HTK toolkit ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "HTK book", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "HTK toolkit", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "- were taken in the 2004 AAAI Spring Symposium where linguists , computer scientists , and other interested researchers first aligned interests and proposed shared tasks and benchmark data sets for the systematic computational research on affect , appeal , subjectivity , and sentiment in text .", |
|
"entity_list": [ |
|
{ |
|
"name": "2004 AAAI", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A single grid can be analysed for both content ( eyeball inspection ) and structure ( cluster analysis , principal component analysis , and a variety of structural indices relating to the complexity and range of the ratings being the chief techniques used ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "eyeball inspection", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "cluster analysis", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "principal component analysis", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 2018 Toyota was regarded as being behind in Self-driving car and in need of innovation .", |
|
"entity_list": [ |
|
{ |
|
"name": "Toyota", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Self-driving car", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Such targets include natural objects such as ground , sea , precipitation ( such as rain , snow or hail ) , sand storm s , animals ( especially birds ) , atmospheric turbulence , and other atmospheric effects , such as ionosphere reflections , meteor trails , and three body scatter spike .", |
|
"entity_list": [ |
|
{ |
|
"name": "ionosphere reflections", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "meteor trails", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "three body scatter spike", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In planning and control , the essential difference between humanoids and other kinds of robots ( like industrial ones ) is that the movement of the robot must be human-like , using legged locomotion , especially biped gait .", |
|
"entity_list": [ |
|
{ |
|
"name": "industrial", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "biped gait", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The gradient descent can take many iterations to compute a local minimum with a required accuracy , if the curvature in different directions is very different for the given function .", |
|
"entity_list": [ |
|
{ |
|
"name": "gradient descent", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "local minimum", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "accuracy", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "curvature", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The 1997 RoboCup 2D Soccer Simulation League was the first RoboCup competition promoted in conjunction with International Joint Conference on Artificial Intelligence held in Nagoya , Japan , from 23 to 29 August 1997 .", |
|
"entity_list": [ |
|
{ |
|
"name": "1997 RoboCup 2D Soccer Simulation League", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "RoboCup", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "International Joint Conference on Artificial Intelligence", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Nagoya", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Japan", |
|
"type": "country" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Other programming options include an embedded Python environment , and an R Console plus support for Rserve .", |
|
"entity_list": [ |
|
{ |
|
"name": "Python", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "R", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Rserve", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "From Bonn he has contributed fundamentally to artificial intelligence and robotics ( with Wolfram Burgard , Dieter Fox , Sebastian Thrun among his students ) , and to the development of software engineering , particularly in civil engineering , and information systems , particularly in the geosciences. won the AAAI Classic Paper award of 2016.2014 .", |
|
"entity_list": [ |
|
{ |
|
"name": "Bonn", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "robotics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Wolfram Burgard", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Dieter Fox", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Sebastian Thrun", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "software engineering", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "civil engineering", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "information systems", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "geosciences.", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "AAAI Classic Paper award", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The first USA edition of Campus Party will take place from 20 to 22 of August at TCF Center in Detroit , Michigan .", |
|
"entity_list": [ |
|
{ |
|
"name": "USA edition of Campus Party", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "TCF Center", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Detroit", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Michigan", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Together with Yann LeCun , and Yoshua Bengio , Hinton won the 2018 Turing Award for conceptual and engineering breakthroughs that have made deep neural networks a critical component of computing .", |
|
"entity_list": [ |
|
{ |
|
"name": "Yann LeCun", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Yoshua Bengio", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Hinton", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Turing Award", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "deep neural networks", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Euler Math Toolbox uses a matrix language similar to MATLAB , a system that had been under development since the 1970s .", |
|
"entity_list": [ |
|
{ |
|
"name": "Euler Math Toolbox", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "MATLAB", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Some languages make it possible portably ( e.g. Scheme , Common Lisp , Perl or D ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Scheme", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Common Lisp", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Perl", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "D", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 1969 a famous book entitled Perceptrons by Marvin Minsky and Seymour Papert showed that it was impossible for these classes of network to learn an XOR function .", |
|
"entity_list": [ |
|
{ |
|
"name": "Perceptrons", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Marvin Minsky", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Seymour Papert", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "XOR function", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Large numbers of Russian scientific and technical documents were translated using SYSTRAN under the auspices of the USAF Foreign Technology Division ( later the National Air and Space Intelligence Center ) at Wright-Patterson Air Force Base , Ohio .", |
|
"entity_list": [ |
|
{ |
|
"name": "Russian", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "SYSTRAN", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "USAF Foreign Technology Division", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "National Air and Space Intelligence Center", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Wright-Patterson Air Force Base", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Ohio", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Semi-supervised learning falls between unsupervised learning ( without any labeled training data ) and supervised learning ( with completely labeled training data ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Semi-supervised learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "unsupervised learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "supervised learning", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "An n -gram model is a type of probabilistic language model for predicting the next item in such a sequence in the form of a ( n − 1 ) -order Markov model .efficiently .", |
|
"entity_list": [ |
|
{ |
|
"name": "n -gram model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "probabilistic language model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Markov model", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Cleveland Clinic has used Cyc to develop a natural language query interface of biomedical information , spanning decades of information on cardiothoracic surgeries .", |
|
"entity_list": [ |
|
{ |
|
"name": "Cleveland Clinic", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Cyc", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "natural language query interface of biomedical information", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The incident strained relations between the United States and Japan , and resulted in the arrest and prosecution two senior executives , as well as the imposition of sanctions on the company by both countries .", |
|
"entity_list": [ |
|
{ |
|
"name": "United States", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Japan", |
|
"type": "country" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "If the modeling is done by an artificial neural network or other machine learning , the optimization of parameters is called training , while the optimization of model hyperparameters is called tuning and often uses cross-validation ..", |
|
"entity_list": [ |
|
{ |
|
"name": "artificial neural network", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "training", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "tuning", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "cross-validation", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Localized versions of the site available in the United Kingdom , India , and Australia were discontinued following the acquisition of Rotten Tomatoes by Fandango .", |
|
"entity_list": [ |
|
{ |
|
"name": "United Kingdom", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "India", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Australia", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Rotten Tomatoes", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Fandango", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The NER model is one of a number of methods for determining the accuracy of live subtitles in television broadcasts and events that are produced using speech recognition .", |
|
"entity_list": [ |
|
{ |
|
"name": "NER", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "accuracy", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Atran has taught at Cambridge University , Hebrew University in Jerusalem , the École pratique des hautes études and École Polytechnique in Paris , and John Jay College of Criminal Justice in New York City .", |
|
"entity_list": [ |
|
{ |
|
"name": "Atran", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Cambridge University", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Hebrew University", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Jerusalem", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "École pratique des hautes études", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "École Polytechnique", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Paris", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "John Jay College of Criminal Justice", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "New York City", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "SHRDLU was an early natural language understanding computer program , developed by Terry Winograd at MIT in 1968-1970", |
|
"entity_list": [ |
|
{ |
|
"name": "SHRDLU", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "natural language understanding", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Terry Winograd", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "MIT", |
|
"type": "university" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "He received a B.E. in electronics engineering from B.M.S. College of Engineering in Bangalore , India in 1982 , when it was affiliated with Bangalore University , an M.S. in electrical and computer engineering in 1984 from Drexel University , and an M.S. in computer science in 1989 , and a Ph.D. in 1990 , respectively , from the University of Wisconsin-Madison , where he studied Artificial Intelligence and worked with Leonard Uhr .", |
|
"entity_list": [ |
|
{ |
|
"name": "B.E.", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "electronics engineering", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "B.M.S. College of Engineering", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Bangalore", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "India", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Bangalore University", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "M.S.", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "electrical and computer engineering", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Drexel University", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "computer science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Ph.D.", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "University of Wisconsin-Madison", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Artificial Intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Leonard Uhr", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Accuracy is usually rated with word error rate ( WER ) , whereas speed is measured with the real time factor .", |
|
"entity_list": [ |
|
{ |
|
"name": "word error rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "WER", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "real time factor", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 1971 Terry Winograd developed an early natural language processing engine capable of interpreting naturally written commands within a simple rule-governed environment .", |
|
"entity_list": [ |
|
{ |
|
"name": "Terry Winograd", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "natural language processing", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In artificial intelligence , Marvin Minsky , Herbert A. Simon , and Allen Newell are prominent .", |
|
"entity_list": [ |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Marvin Minsky", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Herbert A. Simon", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Allen Newell", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In the latter half of the 20th century , electrical engineering itself separated into several disciplines , specialising in the design and analysis of systems that manipulate physical signals ; electronic engineering and computer engineering as examples ; while design engineering developed to deal with functional design of user-machine interfaces .", |
|
"entity_list": [ |
|
{ |
|
"name": "electrical engineering", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "electronic engineering", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computer engineering", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "design engineering", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "user-machine interfaces", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Perhaps the simplest statistic is accuracy or Fraction Correct ( FC ) , which measures the fraction of all instances that are correctly categorized ; it is the ratio of the number of correct classifications to the total number of correct or incorrect classifications : ( TP + TN ) / Total Population = ( TP + TN ) / ( TP + TN + FP + FN ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "accuracy", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Fraction Correct", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FC", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TP + TN", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TP + TN + FP + FN", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In the academic community , the major forums for research started in 1995 when the First International Conference Data Mining and Knowledge Discovery ( KDD-95 ) was started in Montreal under AAAI sponsorship .", |
|
"entity_list": [ |
|
{ |
|
"name": "First International Conference Data Mining and Knowledge Discovery", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "KDD-95", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Montreal", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "AAAI", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In this approach , models are developed using different data mining , machine learning algorithms to predict users ' rating of unrated items .", |
|
"entity_list": [ |
|
{ |
|
"name": "data mining", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In light of the above discussion , we see that the SVM technique is equivalent to empirical risk with Tikhonov regularization , where in this case the loss function is the hinge loss", |
|
"entity_list": [ |
|
{ |
|
"name": "SVM", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "empirical risk", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Tikhonov regularization", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "loss function", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The 2015 edition was hosted by Molly McGrath , with Chris Rose and former UFC fighter Kenny Florian as commentators .", |
|
"entity_list": [ |
|
{ |
|
"name": "Molly McGrath", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Chris Rose", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "UFC", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Kenny Florian", |
|
"type": "person" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A subset called Micro-Planner was implemented by Gerald Jay Sussman , Eugene Charniak and Terry Winograd Sussman , , and Winograd 1971 and was used in Winograd 's natural-language understanding program SHRDLU , Eugene Charniak 's story understanding work , Thorne McCarty 's work on legal reasoning , and some other projects .", |
|
"entity_list": [ |
|
{ |
|
"name": "Micro-Planner", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Gerald Jay Sussman", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Eugene Charniak", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Terry Winograd", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Sussman", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Winograd", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "natural-language understanding", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "SHRDLU", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "story understanding", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Thorne McCarty", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "legal reasoning", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "WordNet has been used for a number of purposes in information systems , including word-sense disambiguation , information retrieval , automatic text classification , Automatic summarization , machine translation and even automatic crossword puzzle generation .", |
|
"entity_list": [ |
|
{ |
|
"name": "WordNet", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "information systems", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "word-sense disambiguation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "information retrieval", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "automatic text classification", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Automatic summarization", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "automatic crossword puzzle generation", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Keutzer was named a Fellow of the IEEE in 1996 .", |
|
"entity_list": [ |
|
{ |
|
"name": "Keutzer", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "IEEE", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A widely used type of composition is the nonlinear weighted sum , where math \\ textstyle f ( x ) = K \\ left ( \\ sum _ i w _ i g _ i ( x ) \\ right ) / math , where math \\ textstyle K / math ( commonly referred to as the activation function ) is some predefined function , such as the hyperbolic tangent , sigmoid function , softmax function , or rectifier function .", |
|
"entity_list": [ |
|
{ |
|
"name": "nonlinear weighted sum", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "activation function", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "hyperbolic tangent", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "sigmoid function", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "softmax function", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "rectifier function", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In the film Westworld , female robots actually engaged in intercourse with human men as part of the make-believe vacation world human customers paid to attend .", |
|
"entity_list": [ |
|
{ |
|
"name": "Westworld", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Typically , the process starts by terminology extraction and concepts or noun phrase s from plain text using linguistic processors such as part-of-speech tagging and phrase chunking .", |
|
"entity_list": [ |
|
{ |
|
"name": "terminology extraction", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "part-of-speech tagging", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "phrase chunking", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "They demonstrated its performance on a number of problems of interest to the machine learning community , including handwriting recognition .", |
|
"entity_list": [ |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "handwriting recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "While studying at Stanford , Scheinman was awarded a fellowship sponsored by George Devol , the inventor of the Unimate , the first industrial robot .", |
|
"entity_list": [ |
|
{ |
|
"name": "Stanford", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Scheinman", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "George Devol", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Unimate", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "industrial robot", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "While originally used to evaluate machine translations , bilingual evaluation understudy ( BLEU ) has been used successfully to evaluate paraphrase generation models as well .", |
|
"entity_list": [ |
|
{ |
|
"name": "machine translations", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "bilingual evaluation understudy", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "BLEU", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "paraphrase generation models", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Unimation later licensed their technology to Kawasaki Heavy Industries and GKN , manufacturing Unimate s in Japan and England respectively .", |
|
"entity_list": [ |
|
{ |
|
"name": "Unimation", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Kawasaki Heavy Industries", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "GKN", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Unimate", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Japan", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "England", |
|
"type": "country" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Much of the confusion between these two research communities ( which do often have separate conferences and separate journals , ECML PKDD being a major exception ) comes from the basic assumptions they work with : in machine learning , performance is usually evaluated with respect to the ability to reproduce known knowledge , while in knowledge discovery and data mining ( KDD ) the key task is the discovery of previously unknown knowledge .", |
|
"entity_list": [ |
|
{ |
|
"name": "ECML PKDD", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "knowledge discovery and data mining", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "KDD", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Hidden Markov model s are the basis for most modern automatic speech recognition systems .", |
|
"entity_list": [ |
|
{ |
|
"name": "Hidden Markov model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "automatic speech recognition systems", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": ", a company in Bangalore , India specializing in online handwriting recognition software .", |
|
"entity_list": [ |
|
{ |
|
"name": "Bangalore", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "India", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "handwriting recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Do repeated translations converge on a single expression in both languages ? I.e. does the translation method show stationarity or produce a canonical form ? Does the translation become stationary without losing the original meaning ? This metric has been criticized as not being well correlated with BLEU ( BiLingual Evaluation Understudy ) scores .", |
|
"entity_list": [ |
|
{ |
|
"name": "canonical form", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "BLEU", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "BiLingual Evaluation Understudy", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "He holds fellowships in the American Association for Artificial Intelligence , the Center for Advanced Study in the Behavioral Sciences at Stanford University , the MIT Center for Cognitive Science , the Canadian Institute for Advanced Research , the Canadian Psychological Association , and was elected Fellow of the Royal Society of Canada in 1998 .", |
|
"entity_list": [ |
|
{ |
|
"name": "American Association for Artificial Intelligence", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Center for Advanced Study in the Behavioral Sciences", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Stanford University", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "MIT", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Cognitive Science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Canadian Institute for Advanced Research", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Canadian Psychological Association", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Royal Society of Canada", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Hinton - together with Yoshua Bengio and Yann LeCun - are referred to by some as the Godfathers of AI and Godfathers of Deep Learning .", |
|
"entity_list": [ |
|
{ |
|
"name": "Hinton", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Yoshua Bengio", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Yann LeCun", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Godfathers of AI", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Godfathers of Deep Learning", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The lightweight open-source speech project eSpeak , which has its own approach to synthesis , has experimented with Mandarin and Cantonese. eSpeak was used by Google Translate from May 20102010 .", |
|
"entity_list": [ |
|
{ |
|
"name": "eSpeak", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Mandarin", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Cantonese.", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Google Translate", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Also released in 1982 , Software Automatic Mouth was the first commercial all-software voice synthesis program .", |
|
"entity_list": [ |
|
{ |
|
"name": "Software Automatic Mouth", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "synthesis program", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The column ratios are TRUE Positive Rate ( TPR , aka Sensitivity or recall ) ( TP / ( TP + FN ) ) , with complement the FALSE Negative Rate ( FNR ) ( FN / ( TP + FN ) ) ; and TRUE Negative Rate ( TNR , aka Specificity , SPC ) ( TN / ( TN + FP ) ) , with complement FALSE Positive Rate ( FPR ) ( FP / ( TN + FP ) ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "TRUE Positive Rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TPR", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Sensitivity", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "recall", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TP / ( TP + FN )", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FALSE Negative Rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FNR", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FN / ( TP + FN )", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TRUE Negative Rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TNR", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Specificity", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "SPC", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TN / ( TN + FP )", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FALSE Positive Rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FPR", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FP / ( TN + FP )", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Edsinger and Weber collaborated on many other robots as well , and their experience working with the Kismet", |
|
"entity_list": [ |
|
{ |
|
"name": "Edsinger", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Weber", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "R functionality is accessible from several scripting languages such as Python , are available as well .", |
|
"entity_list": [ |
|
{ |
|
"name": "R", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Python", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "VAL was one of the first robot languages and was used in Unimate robots .", |
|
"entity_list": [ |
|
{ |
|
"name": "VAL", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Unimate robots", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "They presented their database for the first time as a poster at the 2009 Conference on Computer Vision and Pattern Recognition ( CVPR ) in Florida .", |
|
"entity_list": [ |
|
{ |
|
"name": "2009 Conference on Computer Vision and Pattern Recognition", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "CVPR", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Florida", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Categorization tasks in which no labels are supplied are referred to as unsupervised classification , unsupervised learning , Cluster analysis .", |
|
"entity_list": [ |
|
{ |
|
"name": "Categorization tasks", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "unsupervised classification", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "unsupervised learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Cluster analysis", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "It needs to Object recognition , recognize and locate humans and further emotion recognition .", |
|
"entity_list": [ |
|
{ |
|
"name": "Object recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "emotion recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The process is complex and contains encoding and recall or retrieval .", |
|
"entity_list": [ |
|
{ |
|
"name": "encoding", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "recall", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "retrieval", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Also known as parallel robots , or generalized Stewart platforms ( in the Stewart platform , the actuators are paired together on both the basis and the platform ) , these systems are articulated robot s that use similar mechanisms for the movement of either the robot on its base , or one or more manipulator arms .", |
|
"entity_list": [ |
|
{ |
|
"name": "Stewart platforms", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Stewart platform", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "articulated robot", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Machine vision as a systems engineering discipline can be considered distinct from computer vision , a form of computer science .", |
|
"entity_list": [ |
|
{ |
|
"name": "Machine vision", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "systems engineering", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computer vision", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computer science", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The activation function of the LSTM gates is often the logistic sigmoid function .", |
|
"entity_list": [ |
|
{ |
|
"name": "LSTM gates", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "logistic sigmoid function", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In other words , the sample mean is the ( necessarily unique ) efficient estimator , and thus also the minimum variance unbiased estimator ( MVUE ) , in addition to being the maximum likelihood estimator .", |
|
"entity_list": [ |
|
{ |
|
"name": "sample mean", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "minimum variance unbiased estimator", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "MVUE", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "maximum likelihood estimator", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The 2001 Scientific American article by Berners-Lee , James Hendler , and Ora Lassila described an expected evolution of the existing Web to a Semantic Web .", |
|
"entity_list": [ |
|
{ |
|
"name": "Scientific American", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Berners-Lee", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "James Hendler", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Ora Lassila", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Web", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Semantic Web", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Blade Runner used a number of then-lesser-known actors : Sean Young portrays Rachael , an experimental replicant implanted with the memories of Tyrell 's niece , causing her to believe she is human ; Sammon , pp. 92-93 Nina Axelrod auditioned for the role .", |
|
"entity_list": [ |
|
{ |
|
"name": "Blade Runner", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Sean Young", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Rachael", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Tyrell", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Sammon", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Nina Axelrod", |
|
"type": "person" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Gerry Sussman , Eugene Charniak , Seymour Papert and Terry Winograd visited the University of Edinburgh in 1971 spreading the news about Micro-Planner and SHRDLU and casting doubt on the resolution uniform proof procedure approach that had been the mainstay of the Edinburgh Logicists .", |
|
"entity_list": [ |
|
{ |
|
"name": "Gerry Sussman", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Eugene Charniak", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Seymour Papert", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Terry Winograd", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "University of Edinburgh", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Micro-Planner", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "SHRDLU", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Edinburgh", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Walter 's work inspired subsequent generations of robotics researchers such as Rodney Brooks , Hans Moravec and Mark Tilden .", |
|
"entity_list": [ |
|
{ |
|
"name": "Walter", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "robotics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Rodney Brooks", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Hans Moravec", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Mark Tilden", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Subsequently , a similar GPU-based CNN by Alex Krizhevsky et al. won the ImageNet Large Scale Visual Recognition Challenge 2012 .", |
|
"entity_list": [ |
|
{ |
|
"name": "CNN", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Alex Krizhevsky", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "ImageNet Large Scale Visual Recognition Challenge 2012", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Commonly used loss functions for probabilistic classification include log loss and the Brier score between the predicted and the TRUE probability distributions .", |
|
"entity_list": [ |
|
{ |
|
"name": "loss functions", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "log loss", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Brier score", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TRUE probability", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In May 2016 , NtechLab was admitted to the official testing of biometrics technology by NIST among the three Russian companies .", |
|
"entity_list": [ |
|
{ |
|
"name": "NtechLab", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "biometrics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "NIST", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Russian", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "However , floating-point numbers have only a certain amount of mathematical precision .", |
|
"entity_list": [] |
|
}, |
|
{ |
|
"sentence": "During 2015 , many of SenseTime 's papers were accepted into the Conference on Computer Vision and Pattern Recognition ( CVPR ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "SenseTime", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Conference on Computer Vision and Pattern Recognition", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "CVPR", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "He co-developed optimal algorithms for Structure From Motion ( SFM , or Visual SLAM , simultaneous localization and mapping , in Robotics ; Best Paper Award at Conference on Computer Vision and Pattern Recognition 1998 ) , characterized its ambiguities ( David Marr Prize at ICCV 1999 ) , also characterized the identifiability and observability of visual-inertial sensor fusion ( Best Paper Award at Robotics 2015 ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Structure From Motion", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "SFM", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Visual SLAM", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "simultaneous localization and mapping", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Robotics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Best Paper Award", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Conference on Computer Vision and Pattern Recognition", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "David Marr Prize", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "ICCV 1999", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Stephen H. Muggleton FBCS , FIET , Association for the Advancement of Artificial Intelligence ,", |
|
"entity_list": [ |
|
{ |
|
"name": "Stephen H. Muggleton", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "FBCS", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "FIET", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Association for the Advancement of Artificial Intelligence", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Edge detection is a fundamental tool in image processing , machine vision and computer vision , particularly in the areas of feature detection and feature extraction .", |
|
"entity_list": [ |
|
{ |
|
"name": "Edge detection", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "image processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "machine vision", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computer vision", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "feature detection", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "feature extraction", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "An example of this would be a variable such as outside temperature ( mathtemp / math ) , which in a given application might be recorded to several decimal places of precision ( depending on the sensing apparatus ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "outside temperature", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "decimal places of precision", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The returning judges are Fon Davis , Jessica Chobot , and Leland Melvin , as well as celebrity guest judges actor Clark Gregg , MythBusters host and former Battlebots builder Adam Savage , NFL tightend Vernon Davis , and YouTube star Michael Stevens a.k.a. Vsauce .", |
|
"entity_list": [ |
|
{ |
|
"name": "Fon Davis", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Jessica Chobot", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Leland Melvin", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Clark Gregg", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "MythBusters", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Battlebots", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Adam Savage", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "NFL", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Vernon Davis", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "YouTube", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Michael Stevens", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Vsauce", |
|
"type": "person" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "But these methods never won over the non-uniform internal-handcrafting Gaussian mixture model / Hidden Markov model ( GMM-HMM ) technology based on generative models of speech trained discriminatively .", |
|
"entity_list": [ |
|
{ |
|
"name": "Gaussian mixture model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Hidden Markov model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "GMM-HMM", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Software packages like MATLAB , GNU Octave , Scilab , and SciPy provide convenient ways to apply these different methods .", |
|
"entity_list": [ |
|
{ |
|
"name": "MATLAB", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "GNU Octave", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Scilab", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "SciPy", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Linear predictive coding ( LPC ) , a speech processing algorithm , was first proposed by Fumitada Itakura of Nagoya University and Shuzo Saito of Nippon Telegraph and Telephone ( NTT ) in 1966 .", |
|
"entity_list": [ |
|
{ |
|
"name": "Linear predictive coding", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "LPC", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "speech processing", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Fumitada Itakura", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Nagoya University", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Shuzo Saito", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Nippon Telegraph and Telephone", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "NTT", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 2006 , for the 25th anniversary of the algorithm , a workshop was organized at the International Conference on Computer Vision and Pattern Recognition ( CVPR ) to summarize the most recent contributions and variations to the original algorithm , mostly meant to improve the speed of the algorithm , the robustness and accuracy of the estimated solution and to decrease the dependency from user defined constants .", |
|
"entity_list": [ |
|
{ |
|
"name": "International Conference on Computer Vision and Pattern Recognition", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "CVPR", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The members went to the University of Debrecen , the Hungarian Academy of Sciences , Eötvös Loránd University , etc .", |
|
"entity_list": [ |
|
{ |
|
"name": "University of Debrecen", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Hungarian Academy of Sciences", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Eötvös Loránd University", |
|
"type": "university" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "To extend SVM to cases in which the data are not linearly separable , we introduce the loss function ,", |
|
"entity_list": [ |
|
{ |
|
"name": "SVM", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "loss function", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Logo is an educational programming language , designed in 1967 by Wally Feurzeig , Seymour Papert , and Cynthia Solomon .", |
|
"entity_list": [ |
|
{ |
|
"name": "Logo", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Wally Feurzeig", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Seymour Papert", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Cynthia Solomon", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Eyring Research Institute was instrumental to the U.S. Air Force Missile Directorate at Hill Air Force Base near Ogden , Utah to produce in top military secrecy , the Intelligent Systems Technology Software that was foundational to the later named Reagan Star Wars program .", |
|
"entity_list": [ |
|
{ |
|
"name": "Eyring Research Institute", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "U.S. Air Force Missile Directorate", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Hill Air Force Base", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Ogden", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Utah", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Intelligent Systems Technology Software", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Reagan Star Wars program", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Over the decades he has researched and developed emerging fields of computer science from compiler , programming languages and system architecture John F. Sowa and John Zachman ( 1992 ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "computer science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "John F. Sowa", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "John Zachman", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Sobel operator , sometimes called the Sobel-Feldman operator or Sobel filter , is used in image processing and computer vision , particularly within edge detection algorithms where it creates an image emphasising edges .", |
|
"entity_list": [ |
|
{ |
|
"name": "Sobel operator", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Sobel-Feldman operator", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Sobel filter", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "image processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computer vision", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "edge detection algorithms", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "LDA is a supervised learning algorithm that utilizes the labels of the data , while PCA is an learning algorithm that ignores the labels .", |
|
"entity_list": [ |
|
{ |
|
"name": "LDA", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "supervised learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "PCA", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Other linear classification algorithms include Winnow , support vector machine and logistic regression .", |
|
"entity_list": [ |
|
{ |
|
"name": "Winnow", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "support vector machine", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "logistic regression", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "VTK consists of a C + + class library and several interpreted interface layers including Tcl / Tk , Java , and Python .", |
|
"entity_list": [ |
|
{ |
|
"name": "VTK", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "C + +", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Tcl / Tk", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Python", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Also , text produced by processing spontaneous speech using automatic speech recognition and printed or handwritten text using optical character recognition contains processing noise .", |
|
"entity_list": [ |
|
{ |
|
"name": "automatic speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "optical character recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Miller wrote several books and directed the development of WordNet , an online word-linkage database usable by computer programs .", |
|
"entity_list": [ |
|
{ |
|
"name": "Miller", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "WordNet", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Contemporary automata are represented by the works of Cabaret Mechanical Theatre in the United Kingdom , Dug North and Chomick + Meder , Arthur Ganson , Joe Jones in the United States , Le Défenseur du Temps by French artist Jacques Monestier , and François Junod in Switzerland .", |
|
"entity_list": [ |
|
{ |
|
"name": "automata", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Cabaret Mechanical Theatre", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "United Kingdom", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Dug North", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Chomick + Meder", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Arthur Ganson", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Joe Jones", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "United States", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Le Défenseur du Temps", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "French", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Jacques Monestier", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "François Junod", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Switzerland", |
|
"type": "country" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "MATLAB does include standard codefor / code and codewhile / code loops , but ( as in other similar applications such as R ) , using the vectorized notation is encouraged and is often faster to execute .", |
|
"entity_list": [ |
|
{ |
|
"name": "MATLAB", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "R", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Pausch received two awards from Association for Computing Machinery in 2007 for his achievements in computing education : the Karl V. Karlstrom Outstanding Educator Award and the ACM SIGCSE Award for Outstanding Contributions to Computer Science Education .", |
|
"entity_list": [ |
|
{ |
|
"name": "Pausch", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Association for Computing Machinery", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "computing education", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Karl V. Karlstrom Outstanding Educator Award", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "ACM SIGCSE Award for Outstanding Contributions to Computer Science Education", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 1960 , Devol personally sold the first Unimate robot , which was shipped in 1961 to General Motors .", |
|
"entity_list": [ |
|
{ |
|
"name": "Devol", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Unimate", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "robot", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "General Motors", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Semantic networks are used in natural language processing applications such as semantic parsing .", |
|
"entity_list": [ |
|
{ |
|
"name": "Semantic networks", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "natural language processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "semantic parsing", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Some successful applications of deep learning are computer vision and speech recognition . Honglak Lee , Roger Grosse , Rajesh Ranganath , Andrew Y. Ng .", |
|
"entity_list": [ |
|
{ |
|
"name": "deep learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computer vision", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Honglak Lee", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Roger Grosse", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Rajesh Ranganath", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Andrew Y. Ng", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In addition to maintaining the Discovery One spacecraft systems during the interplanetary mission to Jupiter ( or Saturn in the novel ) , HAL is capable of speech synthesis , speech recognition , facial recognition , natural language processing , lip reading , art appreciation , Affective computing , automated reasoning , spacecraft piloting and playing chess .", |
|
"entity_list": [ |
|
{ |
|
"name": "Discovery One spacecraft systems", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Jupiter", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Saturn", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "HAL", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "speech synthesis", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "facial recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "natural language processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "lip reading", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "art appreciation", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Affective computing", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "automated reasoning", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "spacecraft piloting", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "playing chess", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Dr. Julesz emigrated from Hungary to the United States following the 1956 Soviet invasion .", |
|
"entity_list": [ |
|
{ |
|
"name": "Dr. Julesz", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Hungary", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "the United States", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Soviet", |
|
"type": "country" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Sigmoid function activation functions use a second non-linearity for large inputs : math \\ phi ( v _ i ) = ( 1 + \\ exp ( -v _ i ) ) ^ { -1 } / math .", |
|
"entity_list": [ |
|
{ |
|
"name": "Sigmoid function", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "These probabilities are used to determine what the target is using a maximum likelihood decision .", |
|
"entity_list": [ |
|
{ |
|
"name": "maximum likelihood decision", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 1984 he moved to the University of Konstanz and in 1990 to the University of Salzburg .", |
|
"entity_list": [ |
|
{ |
|
"name": "University of Konstanz", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "University of Salzburg", |
|
"type": "university" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Some popular fitness functions based on the confusion matrix include sensitivity / specificity , recall / precision , F-measure , Jaccard similarity , Matthews correlation coefficient , and cost / gain matrix which combines the costs and gains assigned to the 4 different types of classifications .", |
|
"entity_list": [ |
|
{ |
|
"name": "confusion matrix", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "sensitivity / specificity", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "recall / precision", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "F-measure", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Jaccard similarity", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "Matthews correlation coefficient", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "cost / gain matrix", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Common numerical programming environments such as MATLAB , SciLab , NumPy , Sklearn and the R language provide some of the simpler feature extraction techniques ( e.g. principal component analysis ) via built-in commands .", |
|
"entity_list": [ |
|
{ |
|
"name": "MATLAB", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "SciLab", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "NumPy", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Sklearn", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "R language", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "principal component analysis", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Industrial robots have been implemented to collaborate with humans to perform industrial manufacturing tasks .", |
|
"entity_list": [ |
|
{ |
|
"name": "Industrial robots", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In the first published paper on CGs , John F. Sowa applied them to a wide range of topics in artificial intelligence , computer science , and cognitive science .", |
|
"entity_list": [ |
|
{ |
|
"name": "CGs", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "John F. Sowa", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computer science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "cognitive science", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "NIST also differs from BLEU in its calculation of the brevity penalty , insofar as small variations in translation length do not impact the overall score as much .", |
|
"entity_list": [ |
|
{ |
|
"name": "NIST", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "BLEU", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "brevity penalty", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The IJCAI Award for Research Excellence is a biannual award given at the IJCAI conference to researcher in artificial intelligence as a recognition of excellence of their career .", |
|
"entity_list": [ |
|
{ |
|
"name": "IJCAI Award for Research Excellence", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "IJCAI", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Lenat was one of the original Fellows of the AAAI , and is the only individual to have on the Scientific Advisory Boards of both Microsoft and Apple .", |
|
"entity_list": [ |
|
{ |
|
"name": "Lenat", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "AAAI", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Scientific Advisory Boards of both Microsoft and Apple", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Autoencoders are trained to minimise reconstruction errors ( such as Mean squared error ) , often referred to as the loss :", |
|
"entity_list": [ |
|
{ |
|
"name": "Autoencoders", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Mean squared error", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "loss", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "An alternative to the use of the definitions is to consider general word-sense relatedness and to compute the similarity of each pair of word senses based on a given lexical knowledge base such as WordNet .", |
|
"entity_list": [ |
|
{ |
|
"name": "lexical knowledge base", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "WordNet", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "TD-Lambda is a learning algorithm invented by Richard S. Sutton based on earlier work on temporal difference learning by Arthur Samuel .", |
|
"entity_list": [ |
|
{ |
|
"name": "TD-Lambda", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Richard S. Sutton", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Arthur Samuel", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In data mining and statistics , hierarchical clustering ( also called hierarchical cluster analysis or HCA ) is a method of cluster analysis which seeks to build a hierarchy of clusters .", |
|
"entity_list": [ |
|
{ |
|
"name": "data mining", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "statistics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "hierarchical clustering", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "hierarchical cluster analysis", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "HCA", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "cluster analysis", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The concept of deconvolution is widely used in the techniques of signal processing and image processing .", |
|
"entity_list": [ |
|
{ |
|
"name": "deconvolution", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "signal processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "image processing", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Cognitive maps serve the construction and accumulation of spatial knowledge , allowing the mind 's eye to visualize images in order to reduce cognitive load , enhance recall and learning of information .", |
|
"entity_list": [ |
|
{ |
|
"name": "Cognitive maps", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "cognitive load", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "recall", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": ", typically providing bindings to languages such as Python , C + + , Java ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Python", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "C + +", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A voice-user interface ( VUI ) makes spoken human interaction with computers possible , using speech recognition to understand spoken commands and Question answering , and typically text to speech to play a reply .", |
|
"entity_list": [ |
|
{ |
|
"name": "voice-user interface", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "VUI", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Question answering", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "text to speech", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Jess is a rule engine for the Java platform that was developed by Ernest Friedman-Hill of Sandia National .", |
|
"entity_list": [ |
|
{ |
|
"name": "Jess", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "rule engine", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Ernest Friedman-Hill", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Sandia National", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "For multilayer perceptron s , where a hidden layer exists , more sophisticated algorithms such as backpropagation must be used .", |
|
"entity_list": [ |
|
{ |
|
"name": "multilayer perceptron", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "backpropagation", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Google Translate 's neural machine translation system uses a large end-to-end artificial neural network that attempts to perform deep learning , in particular , long short-term memory networks .", |
|
"entity_list": [ |
|
{ |
|
"name": "Google Translate", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "neural machine translation system", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "end-to-end artificial neural network", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "deep learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "long short-term memory networks", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Various methods for doing so were developed in the 1980s and early 1990s by Werbos , Williams , Robinson , Jürgen Schmidhuber , Sepp Hochreiter , Pearlmutter and others .", |
|
"entity_list": [ |
|
{ |
|
"name": "Werbos", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Williams", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Robinson", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Jürgen Schmidhuber", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Sepp Hochreiter", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Pearlmutter", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "| Apple Apple Inc originally licensed software from Nuance to provide speech recognition capability to its digital assistant Siri .", |
|
"entity_list": [ |
|
{ |
|
"name": "Apple", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Apple Inc", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Nuance", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Siri", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Columbia released several 3D westerns produced by Sam Katzman and directed by William Castle .", |
|
"entity_list": [ |
|
{ |
|
"name": "Columbia", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "3D westerns", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Sam Katzman", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "William Castle", |
|
"type": "person" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "It incorporates knowledge and research in the computer science , linguistics and computer engineering fields .", |
|
"entity_list": [ |
|
{ |
|
"name": "computer science", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "linguistics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computer engineering", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Here is an example of R code :", |
|
"entity_list": [ |
|
{ |
|
"name": "R", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The ROC curve is created by plotting the TRUE positive rate ( TPR ) against the FALSE positive rate ( FPR ) at various threshold settings .", |
|
"entity_list": [ |
|
{ |
|
"name": "ROC curve", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TRUE positive rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "TPR", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FALSE positive rate", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "FPR", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Research stagnated after machine learning research by Marvin Minsky and Seymour Papert ( 1969 ) ,", |
|
"entity_list": [ |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Marvin Minsky", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Seymour Papert", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Other programming environments that are used to build DAQ applications include ladder logic , Visual C + + , Visual Basic , LabVIEW , and MATLAB .", |
|
"entity_list": [ |
|
{ |
|
"name": "DAQ", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "ladder logic", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Visual C + +", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Visual Basic", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "LabVIEW", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "MATLAB", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The metric was designed to fix some of the problems found in the more popular BLEU metric , and also produce good correlation with human judgement at the sentence or segment level .", |
|
"entity_list": [ |
|
{ |
|
"name": "BLEU metric", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Techniques such as dynamic Markov Networks , Convolutional neural network and Long short-term memory are often employed to exploit the semantic correlations between consecutive video frames .", |
|
"entity_list": [ |
|
{ |
|
"name": "dynamic Markov Networks", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Convolutional neural network", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Long short-term memory", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Mass-produced printed circuit board s ( PCBs ) are almost exclusively manufactured by pick-and-place robots , typically with SCARA manipulators , which remove tiny electronic component s from strips or trays , and place them on to PCBs with great accuracy .", |
|
"entity_list": [ |
|
{ |
|
"name": "printed circuit board", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "PCBs", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "pick-and-place robots", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "SCARA", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In the context of machine learning , where it is most widely applied today , LDA was rediscovered independently by David Blei , Andrew Ng and Michael I. Jordan in 2003 , and presented as a graphical model for topic discovery .", |
|
"entity_list": [ |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "LDA", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "David Blei", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Andrew Ng", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Michael I. Jordan", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "graphical model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "topic discovery", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The measured performance on test data of eight naive WSI across various tauopathies resulted in the recall , precision , and an F1 score of 0.92 , 0.72 , and 0.81 , respectively .", |
|
"entity_list": [ |
|
{ |
|
"name": "WSI", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "tauopathies", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "recall", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "precision", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "F1 score", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "With the help of advanced AR technologies ( e.g. adding computer vision , incorporating AR cameras into smartphone and object recognition ) the information about the surrounding real world of the user becomes interactive and digitally manipulated .", |
|
"entity_list": [ |
|
{ |
|
"name": "AR", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "computer vision", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "object recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 2014 , Schmidhuber formed a company , Nnaisense , to work on commercial applications of artificial intelligence in fields such as finance , heavy industry and self-driving car s .", |
|
"entity_list": [ |
|
{ |
|
"name": "Schmidhuber", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Nnaisense", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "artificial intelligence", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "self-driving car", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Not only does this alter the performance of all subsequent tests on the retained explanatory model , it may introduce bias and alter mean square error in estimation .", |
|
"entity_list": [ |
|
{ |
|
"name": "mean square error", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Bigrams are used in most successful language model s for speech recognition .", |
|
"entity_list": [ |
|
{ |
|
"name": "Bigrams", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "language model", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "His research in cognitive psychology has won the Early Career Award ( 1984 ) and Boyd McCandless Award 1986 ) from the American Psychological Association , the Troland Research Award ( 1993 ) from the National Academy of Sciences , the Henry Dale Prize ( 2004 ) from the Royal Institution of Great Britain , and the George Miller Prize ( 2010 ) from the Cognitive Neuroscience Society .", |
|
"entity_list": [ |
|
{ |
|
"name": "cognitive psychology", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Early Career Award", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Boyd McCandless Award", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "American Psychological Association", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Troland Research Award", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "National Academy of Sciences", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Henry Dale Prize", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Royal Institution of Great Britain", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "George Miller Prize", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Cognitive Neuroscience Society", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "An eigenface ( The approach of using eigenfaces for Facial recognition system was developed by Sirovich and Kirby ( 1987 ) and used by Matthew Turk and Alex Pentland in face classification . Turk , Matthew A and Pentland , Alex P. Face recognition using eigenfaces .", |
|
"entity_list": [ |
|
{ |
|
"name": "eigenface", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "eigenfaces", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Facial recognition system", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Sirovich", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Kirby", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Matthew Turk", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Alex Pentland", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "face classification", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Turk , Matthew A", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Pentland , Alex P.", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Face recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A lexical dictionary such as WordNet can then be used for understanding the context .", |
|
"entity_list": [ |
|
{ |
|
"name": "WordNet", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Hyponymy is the most frequently encoded relation among synsets used in lexical databases such as WordNet .", |
|
"entity_list": [ |
|
{ |
|
"name": "Hyponymy", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "synsets", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "WordNet", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "OPeNDAP offers open-source libraries in C + + and Java , but many clients rely on community developed libraries such as libraries include embedded capabilities for retrieving ( array-style ) data from DAP servers .", |
|
"entity_list": [ |
|
{ |
|
"name": "OPeNDAP", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "C + +", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "DAP", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In that page , Samurai Damashii exaggerated the Senkousha as the crystallization of China 's four thousand years of scientific knowledge , commented on the crude design ( e.g. the Chinese Cannon on its crotch ) , and put its image among images of Honda ' s ASIMO and Sony ' s QRIO SDR-3X for juxtaposition .", |
|
"entity_list": [ |
|
{ |
|
"name": "Samurai Damashii", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Senkousha", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "China", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Chinese Cannon", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Honda", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "ASIMO", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Sony", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "QRIO SDR-3X", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "There are also many programming libraries that contain neural network functionality and that can be used in custom implementations ( such as TensorFlow , Theano , etc .", |
|
"entity_list": [ |
|
{ |
|
"name": "neural network", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "TensorFlow", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Theano", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "He is a Fellow of the Association for Computing Machinery , IEEE , American Association for the Advancement of Science , IAPR and SPIE .", |
|
"entity_list": [ |
|
{ |
|
"name": "Association for Computing Machinery", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "IEEE", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "American Association for the Advancement of Science", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "IAPR", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "SPIE", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "A trial by RET in 2011 with Facial recognition system cameras mounted on trams made sure that people were banned from the city trams did not sneak on anyway .", |
|
"entity_list": [ |
|
{ |
|
"name": "RET", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Facial recognition system", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The film , adapted from the popular Cole Porter Broadway musical , starred the MGM songbird team of Howard Keel and Kathryn Grayson as the leads , supported by Ann Miller , Keenan Wynn , Bobby Van , James Whitmore , Kurt Kasznar and Tommy Rall .", |
|
"entity_list": [ |
|
{ |
|
"name": "Cole Porter", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Broadway", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Howard Keel", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Kathryn Grayson", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Ann Miller", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Keenan Wynn", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Bobby Van", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "James Whitmore", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Kurt Kasznar", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Tommy Rall", |
|
"type": "person" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Such applications should streamline the call flows , minimize prompts , eliminate unnecessary iterations and allow elaborate mixed initiative dialog system , which enable callers to enter several pieces of information in a single utterance and in any order or combination .", |
|
"entity_list": [ |
|
{ |
|
"name": "mixed initiative dialog system", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "As such , traditional gradient descent ( or Stochastic gradient descent ) methods can be adapted , where of taking a step in the direction of the function 's gradient , a step is taken in the direction of a vector selected from the function 's sub-gradient .", |
|
"entity_list": [ |
|
{ |
|
"name": "gradient descent", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Stochastic gradient descent", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "If it is assumed that distortion is measured by mean squared error , the distortion D , is given by :", |
|
"entity_list": [ |
|
{ |
|
"name": "mean squared error", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "distortion D", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "MLPs were a popular machine learning solution in the 1980s , finding applications in diverse fields such as speech recognition , image recognition , and machine translation software , Neural networks .", |
|
"entity_list": [ |
|
{ |
|
"name": "MLPs", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "image recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Neural networks", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Allen received his Ph.D. from the University of Toronto in 1979 , under the supervision of C. Raymond Perrault ,", |
|
"entity_list": [ |
|
{ |
|
"name": "Allen", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "University of Toronto", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "C. Raymond Perrault", |
|
"type": "researcher" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "OpenCV supports some models from deep learning frameworks like TensorFlow , Torch , PyTorch ( after converting to an ONNX model ) and Caffe according to a defined list of supported layers .", |
|
"entity_list": [ |
|
{ |
|
"name": "OpenCV", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "deep learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "TensorFlow", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Torch", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "PyTorch", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "ONNX", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Caffe", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Previously , Christensen was the Founding Chairman of European Robotics Research Network ( EURON ) and an IEEE Robotics and Automation Society Distinguished Lecturer in Robotics .", |
|
"entity_list": [ |
|
{ |
|
"name": "Christensen", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "European Robotics Research Network", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "EURON", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "IEEE Robotics and Automation Society", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Robotics", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "He received his master 's degree in mathematics from the Samarkand State University , Samarkand , Uzbek Soviet Socialist Republic in 1958 and Ph.D in statistics at the Institute of Control Sciences , Moscow in 1964 .", |
|
"entity_list": [ |
|
{ |
|
"name": "mathematics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Samarkand State University", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Samarkand", |
|
"type": "location" |
|
}, |
|
{ |
|
"name": "Uzbek Soviet Socialist Republic", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Ph.D", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "statistics", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "Institute of Control Sciences", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Moscow", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Increasingly , however , work at Cycorp involves giving the Cyc system the ability to communicate with end users in natural language , and to assist with the ongoing knowledge formation process via machine learning and natural language understanding .", |
|
"entity_list": [ |
|
{ |
|
"name": "Cycorp", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Cyc system", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "machine learning", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "natural language understanding", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "For example , if the most suitable classifier for the problem is sought , the training dataset is used to train the candidate algorithms , the validation dataset is used to compare their performances and decide which one to take and , finally , the test dataset is used to obtain the performance characteristics such as accuracy , sensitivity , specificity , F-measure , and so on .", |
|
"entity_list": [ |
|
{ |
|
"name": "accuracy", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "sensitivity", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "specificity", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "F-measure", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Mean squared error is 0.15 .", |
|
"entity_list": [ |
|
{ |
|
"name": "Mean squared error", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In 1979 a Micromouse competition was organized by the IEEE as shown in the Spectrum magazine .", |
|
"entity_list": [ |
|
{ |
|
"name": "Micromouse competition", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "IEEE", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Spectrum", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Gabor space is very useful in image processing applications such as optical character recognition , iris recognition and fingerprint recognition .", |
|
"entity_list": [ |
|
{ |
|
"name": "Gabor space", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "image processing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "optical character recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "iris recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "fingerprint recognition", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "or via high-level interfaces to Java and Tcl .", |
|
"entity_list": [ |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Tcl", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In recent research , kernel-based methods such as support vector machine s have shown superior performance in supervised .", |
|
"entity_list": [ |
|
{ |
|
"name": "support vector machine", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "supervised", |
|
"type": "field" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "To illustrate the basic principles of bagging , below is an analysis on the relationship between ozone and temperature ( data from Rousseeuw and Leroy ( 1986 ) , analysis done in R ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "ozone", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Rousseeuw", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Leroy", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "R", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Denso Wave is a subsidiary that produces automatic identification products ( bar-code reader s and related products ) , industrial robot s and programmable logic controller s .", |
|
"entity_list": [ |
|
{ |
|
"name": "Denso Wave", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "bar-code reader", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "industrial robot", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "programmable logic controller", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Where Bilingual evaluation understudy simply calculates n-gram precision adding equal weight to each one , NIST also calculates how informative a particular n-gram is .", |
|
"entity_list": [ |
|
{ |
|
"name": "Bilingual evaluation understudy", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "n-gram precision", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "NIST", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "n-gram", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In particular , they are used during the calculation of likelihood of a tree ( in Bayesian and maximum likelihood approaches to tree estimation ) and they are used to estimate the evolutionary distance between sequences from the observed differences between the sequences .", |
|
"entity_list": [ |
|
{ |
|
"name": "Bayesian", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "maximum likelihood", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Audio Engineering Society recommends 48 kHz sampling rate for most applications but gives recognition to 44.1 kHz for Compact Disc ( CD ) and other consumer uses , 32 kHz for transmission-related applications , and 96 kHz for higher bandwidth or relaxed anti-aliasing filter ing .", |
|
"entity_list": [ |
|
{ |
|
"name": "Audio Engineering Society", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Compact Disc", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "CD", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "anti-aliasing filter", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Resources for affectivity of words and concepts have been made for WordNet { { cite journal", |
|
"entity_list": [ |
|
{ |
|
"name": "WordNet", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In red-green anaglyph , the audience was presented three reels of tests , which included rural scenes , test shots of Marie Doro , a segment of John B. Mason playing a number of passages from Jim the Penman ( a film released by Famous Players-Lasky that year , but not in 3D ) , Oriental dancers , and a reel of footage of Niagara Falls .", |
|
"entity_list": [ |
|
{ |
|
"name": "red-green anaglyph", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Marie Doro", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "John B. Mason", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Jim the Penman", |
|
"type": "person" |
|
}, |
|
{ |
|
"name": "Famous Players-Lasky", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Niagara Falls", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "This is a particular way of implementing maximum likelihood estimation for this problem .", |
|
"entity_list": [ |
|
{ |
|
"name": "maximum likelihood estimation", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Crawler-friendly Web Servers , and it integrates the features of sitemaps and RSS feeds into a decentralized mechanism for computational biologists and bio-informaticians to openly broadcast and retrieve meta-data about biomedical resources .", |
|
"entity_list": [ |
|
{ |
|
"name": "Crawler-friendly Web Servers", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "RSS", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "It is covered by American National Standards Institute / NISO standard Z39.50 , and International Organization for Standardization standard 23950 .", |
|
"entity_list": [ |
|
{ |
|
"name": "American National Standards Institute / NISO standard Z39.50", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "International Organization for Standardization standard 23950", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The encoder and decoder are trained to take a phrase and reproduce the one-hot distribution of a corresponding paraphrase by minimizing perplexity using simple stochastic gradient descent .", |
|
"entity_list": [ |
|
{ |
|
"name": "one-hot distribution", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "perplexity", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "stochastic gradient descent", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Other typical applications of pattern recognition techniques are automatic speech recognition , classification of text into several categories ( e.g. , spam / non-spam email messages ) , the handwriting recognition on postal envelopes , automatic recognition of images of human faces , or handwriting image extraction from medical forms .", |
|
"entity_list": [ |
|
{ |
|
"name": "pattern recognition", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "automatic speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "classification of text into several categories", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "handwriting recognition on postal envelopes", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "automatic recognition of images of human faces", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "handwriting image extraction from medical forms", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Artificial neural networks have been used on a variety of tasks , including computer vision , speech recognition , machine translation , social network filtering , playing board and video games and medical diagnosis .", |
|
"entity_list": [ |
|
{ |
|
"name": "Artificial neural networks", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "computer vision", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "speech recognition", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "machine translation", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "social network filtering", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "playing board and video games", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "medical diagnosis", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Examples include Salford Systems CART ( which licensed the proprietary code of the original CART authors ) , IBM SPSS Modeler , RapidMiner , SAS Enterprise Miner , Matlab , R ( an open-source software environment for statistical computing , which includes several CART implementations such as rpart , party and randomForest packages ) , Weka ( a free and open-source data-mining suite , contains many decision tree algorithms ) , Orange , KNIME , Microsoft SQL Server programming language ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Salford Systems", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "CART", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "IBM", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "SPSS Modeler", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "RapidMiner", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "SAS Enterprise Miner", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Matlab", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "R", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "statistical computing", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "rpart", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "party", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "randomForest", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Weka", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "data-mining", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "decision tree", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Orange", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "KNIME", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Microsoft SQL Server", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Linear predictive coding ( LPC ) was first developed by Fumitada Itakura of Nagoya University and Shuzo Saito of Nippon Telegraph and Telephone ( NTT ) in 1966 , and then further developed by Bishnu S. Atal and Manfred R. Schroeder at Bell Labs during the early-to-mid-1970s , becoming a basis for the first speech synthesizer DSP chips in the late 1970s .", |
|
"entity_list": [ |
|
{ |
|
"name": "Linear predictive coding", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "LPC", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Fumitada Itakura", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Nagoya University", |
|
"type": "university" |
|
}, |
|
{ |
|
"name": "Shuzo Saito", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Nippon Telegraph and Telephone", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "NTT", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Bishnu S. Atal", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Manfred R. Schroeder", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Bell Labs", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "speech synthesizer DSP chips", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "An F-score is a combination of the precision and the recall , providing a single score .", |
|
"entity_list": [ |
|
{ |
|
"name": "F-score", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "precision", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "recall", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Image analysis tasks can be as simple as reading bar code d tags or as sophisticated as facial recognition system .", |
|
"entity_list": [ |
|
{ |
|
"name": "Image analysis", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "reading bar code d tags", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "facial recognition system", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The special case of linear support-vector machines can be solved more efficiently by the same kind of algorithms to optimize its close cousin , logistic regression ; this class of algorithms includes Stochastic gradient descent ( e.g. , PEGASOS ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "support-vector machines", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "logistic regression", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "Stochastic gradient descent", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "PEGASOS", |
|
"type": "algorithm" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "When Siri on an iOS device is asked Do you have a pet ? , one the responses is I used to have an AIBO .", |
|
"entity_list": [ |
|
{ |
|
"name": "Siri", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "iOS", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "AIBO", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In information retrieval , the positive predictive value is called precision , and sensitivity is called recall .", |
|
"entity_list": [ |
|
{ |
|
"name": "information retrieval", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "positive predictive value", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "precision", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "sensitivity", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "recall", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In particular , his research focused on areas such as text mining ( extraction , categorization , novelty detection ) and in new theoretical frameworks such as a unified utility-based theory bridging information retrieval , Automatic summarization , free-text Question Answering and related tasks .", |
|
"entity_list": [ |
|
{ |
|
"name": "text mining", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "extraction", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "categorization", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "novelty detection", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "information retrieval", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "Automatic summarization", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "free-text Question Answering", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Delta robot s have base-mounted rotary actuator s that move a light , stiff , parallelogram arm .", |
|
"entity_list": [ |
|
{ |
|
"name": "Delta robot", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "rotary actuator", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "parallelogram arm", |
|
"type": "else" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The four outcomes can be formulated in a 2 × 2 contingency table or confusion matrix , as follows :", |
|
"entity_list": [ |
|
{ |
|
"name": "2 × 2 contingency table", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "confusion matrix", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The actual data mining task is the semi-automatic or automatic analysis of large quantities of data to extract unknown , interesting patterns such as groups of data records ( cluster analysis ) , unusual records ( anomaly detection ) , and dependencies ( association rule mining , sequential pattern mining ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "data mining", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "cluster analysis", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "anomaly detection", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "association rule mining", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "sequential pattern mining", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "For a recommender system , sentiment analysis has been proven to be a valuable technique .", |
|
"entity_list": [ |
|
{ |
|
"name": "recommender system", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "sentiment analysis", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "By chance , the Germans had chosen the operating frequency of the Wotan system very badly ; it operated on 45 MHz , which just happened to be the frequency of the powerful-but-dormant BBC television transmitter at Alexandra Palace .", |
|
"entity_list": [ |
|
{ |
|
"name": "Germans", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "Wotan", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "BBC", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Alexandra Palace", |
|
"type": "location" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In Semantic Web applications , and in relatively popular applications of RDF like RSS and FOAF ( Friend a Friend ) , resources tend to be represented by URIs that intentionally denote , and can be used to access , actual data on the World Wide Web .", |
|
"entity_list": [ |
|
{ |
|
"name": "Semantic Web applications", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "RDF", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "RSS", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "FOAF", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "Friend a Friend", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "URIs", |
|
"type": "else" |
|
}, |
|
{ |
|
"name": "World Wide Web", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Association for the Advancement of Artificial Intelligence has studied this topic in depth", |
|
"entity_list": [ |
|
{ |
|
"name": "Association for the Advancement of Artificial Intelligence", |
|
"type": "conference" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Starting as a curiosity , the speech system of Apple Macintosh has evolved into a fully supported program PlainTalk , for people with vision problems .", |
|
"entity_list": [ |
|
{ |
|
"name": "speech system of Apple Macintosh", |
|
"type": "product" |
|
}, |
|
{ |
|
"name": "PlainTalk", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Other areas of usage for ontologies within NLP include information retrieval , information extraction and automatic summarization .", |
|
"entity_list": [ |
|
{ |
|
"name": "NLP", |
|
"type": "field" |
|
}, |
|
{ |
|
"name": "information retrieval", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "information extraction", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "automatic summarization", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "The Institute has collaborated closely with the Janelia Farm Campus of Howard Hughes Medical Institute , the Allen Institute for Brain Science and the National Institutes of Health to develop better methods of reconstructing neuronal architectures .", |
|
"entity_list": [ |
|
{ |
|
"name": "Janelia Farm Campus of Howard Hughes Medical Institute", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Allen Institute for Brain Science", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "National Institutes of Health", |
|
"type": "organization" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Recently , Google announced that Google Translate translates roughly enough text to fill 1 million books in one day ( 2012 ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "Google", |
|
"type": "organization" |
|
}, |
|
{ |
|
"name": "Google Translate", |
|
"type": "product" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Events are held worldwide , and are most popular in the United Kingdom , United States , Japan , Singapore , India , South Korea and becoming popular in subcontinent countries such as Sri Lanka .", |
|
"entity_list": [ |
|
{ |
|
"name": "United Kingdom", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "United States", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Japan", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Singapore", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "India", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "South Korea", |
|
"type": "country" |
|
}, |
|
{ |
|
"name": "Sri Lanka", |
|
"type": "country" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "These packages are developed primarily in R , and sometimes in Java , C , C + + , and Fortran .", |
|
"entity_list": [ |
|
{ |
|
"name": "R", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Java", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "C", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "C + +", |
|
"type": "program language" |
|
}, |
|
{ |
|
"name": "Fortran", |
|
"type": "program language" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "As part of the 2006 European Conference on Computer Vision ( ECCV ) , Dalal and Triggs teamed up with Cordelia Schmid to apply HOG detectors to the problem of human detection in films and videos .", |
|
"entity_list": [ |
|
{ |
|
"name": "2006 European Conference on Computer Vision", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "ECCV", |
|
"type": "conference" |
|
}, |
|
{ |
|
"name": "Dalal", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Triggs", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "Cordelia Schmid", |
|
"type": "researcher" |
|
}, |
|
{ |
|
"name": "HOG detectors", |
|
"type": "algorithm" |
|
}, |
|
{ |
|
"name": "human detection in films and videos", |
|
"type": "task" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "In addition to sensitivity and specificity , the performance of a binary classification test can be measured with positive predictive value ( PPV ) , also known as precision , and negative predictive value ( NPV ) .", |
|
"entity_list": [ |
|
{ |
|
"name": "sensitivity", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "specificity", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "binary classification", |
|
"type": "task" |
|
}, |
|
{ |
|
"name": "positive predictive value", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "PPV", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "precision", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "negative predictive value", |
|
"type": "metrics" |
|
}, |
|
{ |
|
"name": "NPV", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Such models may given partial credit for overlapping matches ( such as using the Jaccard index criterion .", |
|
"entity_list": [ |
|
{ |
|
"name": "Jaccard index criterion", |
|
"type": "metrics" |
|
} |
|
] |
|
}, |
|
{ |
|
"sentence": "Further , in the case of estimation based on a single sample , it demonstrates philosophical issues and possible misunderstandings in the use of maximum likelihood estimators and likelihood functions .", |
|
"entity_list": [ |
|
{ |
|
"name": "maximum likelihood estimators and likelihood functions", |
|
"type": "metrics" |
|
} |
|
] |
|
} |
|
] |