diff --git "a/data/datasets/CrossNER/ai/test.json" "b/data/datasets/CrossNER/ai/test.json" new file mode 100644--- /dev/null +++ "b/data/datasets/CrossNER/ai/test.json" @@ -0,0 +1,9278 @@ +[ + { + "sentence": "Typical generative model approaches include naive Bayes classifier s , Gaussian mixture model s , variational autoencoders and others .", + "entity_list": [ + { + "name": "naive Bayes classifier", + "type": "algorithm" + }, + { + "name": "Gaussian mixture model", + "type": "algorithm" + }, + { + "name": "variational autoencoders", + "type": "algorithm" + } + ] + }, + { + "sentence": "Finally , every other year , ELRA organizes a major conference LREC , the International Language Resources and Evaluation Conference .", + "entity_list": [ + { + "name": "ELRA", + "type": "conference" + }, + { + "name": "LREC", + "type": "conference" + }, + { + "name": "International Language Resources and Evaluation Conference", + "type": "conference" + } + ] + }, + { + "sentence": "The task is usually to derive the maximum likelihood estimate of the parameters of the HMM given the of output sequences .", + "entity_list": [ + { + "name": "maximum likelihood estimate", + "type": "algorithm" + }, + { + "name": "HMM", + "type": "algorithm" + } + ] + }, + { + "sentence": "Unlike neural network s and Support vector machine , the AdaBoost training process selects only those features known to improve the predictive power of the model , reducing dimensionality and potentially improving execution time as irrelevant features need not be computed .", + "entity_list": [ + { + "name": "neural network", + "type": "algorithm" + }, + { + "name": "Support vector machine", + "type": "algorithm" + }, + { + "name": "AdaBoost", + "type": "algorithm" + } + ] + }, + { + "sentence": "Troponymy is one of the possible relations between verb s in the semantic network of the WordNet database .", + "entity_list": [ + { + "name": "Troponymy", + "type": "else" + }, + { + "name": "semantic network", + "type": "else" + }, + { + "name": "WordNet database", + "type": "product" + } + ] + }, + { + "sentence": "A frame language is a technology used for knowledge representation in artificial intelligence .", + "entity_list": [ + { + "name": "knowledge representation", + "type": "task" + }, + { + "name": "artificial intelligence", + "type": "field" + } + ] + }, + { + "sentence": "NIST also differs from Bilingual evaluation understudy in its calculation of the brevity penalty insofar as small variations in translation length do not impact the overall score as much .", + "entity_list": [ + { + "name": "NIST", + "type": "metrics" + }, + { + "name": "Bilingual evaluation understudy", + "type": "metrics" + }, + { + "name": "brevity penalty", + "type": "else" + } + ] + }, + { + "sentence": "The model is initially fit on a training dataset , The model ( e.g. a neural net or a naive Bayes classifier ) is trained on the training dataset using a supervised learning method , for example using optimization methods such as gradient descent or stochastic gradient descent .", + "entity_list": [ + { + "name": "neural net", + "type": "algorithm" + }, + { + "name": "naive Bayes classifier", + "type": "algorithm" + }, + { + "name": "supervised learning", + "type": "field" + }, + { + "name": "gradient descent", + "type": "algorithm" + }, + { + "name": "stochastic gradient descent", + "type": "algorithm" + } + ] + }, + { + "sentence": "FrameNet has been used in applications like question answering , paraphrasing , recognizing textual entailment , and information extraction , either directly or by means of Semantic Role Labeling tools .", + "entity_list": [ + { + "name": "FrameNet", + "type": "product" + }, + { + "name": "question answering", + "type": "task" + }, + { + "name": "paraphrasing", + "type": "task" + }, + { + "name": "recognizing textual entailment", + "type": "task" + }, + { + "name": "information extraction", + "type": "task" + }, + { + "name": "Semantic Role Labeling", + "type": "task" + } + ] + }, + { + "sentence": "This would include programs such as data analysis and extraction tools , spreadsheets ( e.g. Excel ) , databases ( e.g. Access ) , statistical analysis ( e.g. SAS ) , generalized audit software ( e.g. ACL , Arbutus , EAS ) , business intelligence ( e.g. Crystal Reports and Business Objects ) , etc .", + "entity_list": [ + { + "name": "data analysis", + "type": "field" + }, + { + "name": "spreadsheets", + "type": "else" + }, + { + "name": "Excel", + "type": "product" + }, + { + "name": "databases", + "type": "else" + }, + { + "name": "Access", + "type": "product" + }, + { + "name": "statistical analysis", + "type": "field" + }, + { + "name": "SAS", + "type": "product" + }, + { + "name": "generalized audit software", + "type": "else" + }, + { + "name": "ACL", + "type": "product" + }, + { + "name": "Arbutus", + "type": "product" + }, + { + "name": "EAS", + "type": "product" + }, + { + "name": "business intelligence", + "type": "else" + }, + { + "name": "Crystal Reports", + "type": "product" + }, + { + "name": "Business Objects", + "type": "product" + } + ] + }, + { + "sentence": "Rethink Robotics - founded by Rodney Brooks , previously with iRobot - introduced Baxter in September 2012 ; as an industrial robot designed to safely interact with neighboring human workers , and be programmable for performing simple tasks .", + "entity_list": [ + { + "name": "Rethink Robotics", + "type": "organization" + }, + { + "name": "Rodney Brooks", + "type": "researcher" + }, + { + "name": "iRobot", + "type": "organization" + }, + { + "name": "Baxter", + "type": "product" + }, + { + "name": "industrial robot", + "type": "product" + } + ] + }, + { + "sentence": "Typical text mining tasks include text categorization , text clustering , concept / entity extraction , production of granular taxonomies , sentiment analysis , document summarization , and entity relation modeling ( i.e. , learning relations between named entity recognition ) .", + "entity_list": [ + { + "name": "text mining", + "type": "field" + }, + { + "name": "text categorization", + "type": "task" + }, + { + "name": "text clustering", + "type": "task" + }, + { + "name": "concept / entity extraction", + "type": "task" + }, + { + "name": "production of granular taxonomies", + "type": "task" + }, + { + "name": "sentiment analysis", + "type": "task" + }, + { + "name": "document summarization", + "type": "task" + }, + { + "name": "entity relation modeling", + "type": "task" + }, + { + "name": "named entity recognition", + "type": "task" + } + ] + }, + { + "sentence": "Nonetheless , stemming reduces precision , or TRUE negative rate , for such systems .", + "entity_list": [ + { + "name": "precision", + "type": "metrics" + }, + { + "name": "TRUE negative rate", + "type": "metrics" + } + ] + }, + { + "sentence": "A special case of keyword spotting is wake word ( also called hot word ) detection used by personal digital assistants such as Alexa or Siri to wake up when their name is spoken .", + "entity_list": [ + { + "name": "keyword spotting", + "type": "task" + }, + { + "name": "wake word", + "type": "else" + }, + { + "name": "hot word", + "type": "else" + }, + { + "name": "Alexa", + "type": "product" + }, + { + "name": "Siri", + "type": "product" + } + ] + }, + { + "sentence": "Prova is an open source programming language that combines Prolog with Java .", + "entity_list": [ + { + "name": "Prova", + "type": "program language" + }, + { + "name": "Prolog", + "type": "program language" + }, + { + "name": "Java", + "type": "program language" + } + ] + }, + { + "sentence": "In 1987 , Tocibai Machine , a subsidiary of Toshiba , was accused of illegally selling CNC milling s used to produce very quiet submarine propellers to the Soviet Union in violation of the CoCom agreement , an international embargo on certain countries to COMECON countries .", + "entity_list": [ + { + "name": "Tocibai Machine", + "type": "organization" + }, + { + "name": "Toshiba", + "type": "organization" + }, + { + "name": "CNC milling", + "type": "product" + }, + { + "name": "Soviet Union", + "type": "country" + }, + { + "name": "CoCom", + "type": "organization" + }, + { + "name": "COMECON", + "type": "else" + } + ] + }, + { + "sentence": "Engelberger 's most famous co-invention , the Unimate industrial robotic arm , was among the first inductees into the Robot Hall of Fame in 2003 .", + "entity_list": [ + { + "name": "Engelberger", + "type": "researcher" + }, + { + "name": "Unimate industrial robotic arm", + "type": "product" + }, + { + "name": "Robot Hall of Fame", + "type": "location" + } + ] + }, + { + "sentence": "Originally controlled via static html web pages using CGI , work by Dalton saw the introduction of an augmented reality Java -based interface that met with limited success .", + "entity_list": [ + { + "name": "static html", + "type": "else" + }, + { + "name": "CGI", + "type": "else" + }, + { + "name": "Dalton", + "type": "person" + }, + { + "name": "augmented reality", + "type": "field" + }, + { + "name": "Java", + "type": "program language" + } + ] + }, + { + "sentence": "The first publication about the LMF specification as it has been ratified by ISO ( this paper became ( in 2015 ) the 9th most cited paper within the LREC conferences from LREC papers ) :", + "entity_list": [ + { + "name": "LMF specification", + "type": "task" + }, + { + "name": "ISO", + "type": "organization" + }, + { + "name": "LREC", + "type": "conference" + } + ] + }, + { + "sentence": "A confusion matrix or matching matrix is often used as a tool to validate the accuracy of k -NN classification .", + "entity_list": [ + { + "name": "confusion matrix", + "type": "metrics" + }, + { + "name": "accuracy", + "type": "metrics" + }, + { + "name": "k -NN classification", + "type": "algorithm" + } + ] + }, + { + "sentence": "Decision tree learning is one of the predictive modeling approaches used in statistics , data mining and machine learning .", + "entity_list": [ + { + "name": "Decision tree", + "type": "algorithm" + }, + { + "name": "statistics", + "type": "field" + }, + { + "name": "data mining", + "type": "field" + }, + { + "name": "machine learning", + "type": "field" + } + ] + }, + { + "sentence": "At runtime , the target prosody of a sentence is superimposed on these minimal units by means of signal processing techniques such as linear predictive coding , PSOLA", + "entity_list": [ + { + "name": "prosody", + "type": "else" + }, + { + "name": "signal processing", + "type": "field" + }, + { + "name": "linear predictive coding", + "type": "algorithm" + } + ] + }, + { + "sentence": "This approach utilized artificial intelligence and machine learning to allow researchers to visibly compare conventional and thermal facial imagery .", + "entity_list": [ + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "machine learning", + "type": "field" + }, + { + "name": "facial imagery", + "type": "task" + } + ] + }, + { + "sentence": "In computer science , evolutionary computation is a family of algorithms for global optimization inspired by biological evolution , and the subfield of artificial intelligence and soft computing studying these algorithms .", + "entity_list": [ + { + "name": "computer science", + "type": "field" + }, + { + "name": "evolutionary computation", + "type": "algorithm" + }, + { + "name": "global optimization", + "type": "task" + }, + { + "name": "biological evolution", + "type": "else" + }, + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "soft computing", + "type": "field" + } + ] + }, + { + "sentence": "For instance , one can combine some measure based on the confusion matrix with the mean squared error evaluated between the raw model outputs and the actual values .", + "entity_list": [ + { + "name": "confusion matrix", + "type": "metrics" + }, + { + "name": "mean squared error", + "type": "metrics" + } + ] + }, + { + "sentence": "The majority are results of the word2vec model developed by Mikolov et al or variants of word2vec .", + "entity_list": [ + { + "name": "word2vec model", + "type": "product" + }, + { + "name": "Mikolov", + "type": "researcher" + }, + { + "name": "word2vec", + "type": "product" + } + ] + }, + { + "sentence": "It was during this time that a total of 43 publications were recognized by the CVPR and the International Conference on Computer Vision ( ICCV ) .", + "entity_list": [ + { + "name": "CVPR", + "type": "conference" + }, + { + "name": "International Conference on Computer Vision", + "type": "conference" + }, + { + "name": "ICCV", + "type": "conference" + } + ] + }, + { + "sentence": "The AIBO has seen much use as an inexpensive platform for artificial intelligence education and research , because integrates a computer , Computer vision , and articulators in a package vastly cheaper than conventional research robots .", + "entity_list": [ + { + "name": "AIBO", + "type": "product" + }, + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "Computer vision", + "type": "field" + } + ] + }, + { + "sentence": "She served as Program Chair of International Conference on Computer Vision 2021 .", + "entity_list": [ + { + "name": "International Conference on Computer Vision 2021", + "type": "conference" + } + ] + }, + { + "sentence": "Scheinman , after receiving a fellowship from Unimation to develop his designs , sold those designs to Unimation who further developed them with support from General Motors and later marketed it as the Programmable Universal Machine for Assembly ( PUMA ) .", + "entity_list": [ + { + "name": "Scheinman", + "type": "researcher" + }, + { + "name": "Unimation", + "type": "organization" + }, + { + "name": "General Motors", + "type": "organization" + }, + { + "name": "Programmable Universal Machine for Assembly", + "type": "product" + }, + { + "name": "PUMA", + "type": "product" + } + ] + }, + { + "sentence": "An overview of calibration methods for binary classification and multiclass classification classification tasks is given by Gebel ( 2009 )", + "entity_list": [ + { + "name": "binary classification", + "type": "task" + }, + { + "name": "multiclass classification classification tasks", + "type": "task" + }, + { + "name": "Gebel", + "type": "researcher" + } + ] + }, + { + "sentence": "He is involved in fields such as optical character recognition ( OCR ) , speech synthesis , speech recognition technology , and electronic keyboard instruments .", + "entity_list": [ + { + "name": "optical character recognition", + "type": "task" + }, + { + "name": "OCR", + "type": "task" + }, + { + "name": "speech synthesis", + "type": "task" + }, + { + "name": "speech recognition", + "type": "task" + } + ] + }, + { + "sentence": "For more recent and state-of-the-art techniques , Kaldi toolkit can be used .", + "entity_list": [ + { + "name": "Kaldi toolkit", + "type": "product" + } + ] + }, + { + "sentence": "Johnson-Laird is a Fellow of the American Philosophical Society , a Fellow of the Royal Society , a Fellow of the British Academy , a William James Fellow of the Association for Psychological Science , and a Fellow of the Cognitive Science Society .", + "entity_list": [ + { + "name": "Johnson-Laird", + "type": "researcher" + }, + { + "name": "American Philosophical Society", + "type": "organization" + }, + { + "name": "Royal Society", + "type": "organization" + }, + { + "name": "British Academy", + "type": "organization" + }, + { + "name": "William James", + "type": "researcher" + }, + { + "name": "Association for Psychological Science", + "type": "organization" + }, + { + "name": "Cognitive Science Society", + "type": "organization" + } + ] + }, + { + "sentence": "At the IEEE International Conference on Image Processing in 2010 , Rui Hu , Mark Banard , and John Collomosse extended the HOG descriptor for use in sketch based image retrieval ( SBIR ) .", + "entity_list": [ + { + "name": "IEEE International Conference on Image Processing", + "type": "conference" + }, + { + "name": "Rui Hu", + "type": "researcher" + }, + { + "name": "Mark Banard", + "type": "researcher" + }, + { + "name": "John Collomosse", + "type": "researcher" + }, + { + "name": "HOG descriptor", + "type": "algorithm" + }, + { + "name": "sketch based image retrieval", + "type": "task" + }, + { + "name": "SBIR", + "type": "task" + } + ] + }, + { + "sentence": "BLEU uses a modified form of precision to compare a candidate translation against multiple reference translations .", + "entity_list": [ + { + "name": "BLEU", + "type": "metrics" + }, + { + "name": "precision", + "type": "metrics" + } + ] + }, + { + "sentence": "For the case of a general base space math ( Y , \\ mathcal { B } , \\ nu ) / math ( i.e. a base space which is not countable ) , one typically considers the relative entropy .", + "entity_list": [ + { + "name": "relative entropy", + "type": "metrics" + } + ] + }, + { + "sentence": "As of October 2011 , the already-existing partnerships with the United States ' National Park Service ( NPS ) , the United Kingdom 's Historic Scotland ( HS ) , World Monuments Fund , and Mexico 's Instituto Nacional de Antropología y Historia ( INAH ) had been greatly expanded , , CyArk website", + "entity_list": [ + { + "name": "United States", + "type": "country" + }, + { + "name": "National Park Service", + "type": "organization" + }, + { + "name": "NPS", + "type": "organization" + }, + { + "name": "United Kingdom", + "type": "country" + }, + { + "name": "Historic Scotland", + "type": "organization" + }, + { + "name": "HS", + "type": "organization" + }, + { + "name": "World Monuments Fund", + "type": "organization" + }, + { + "name": "Mexico", + "type": "country" + }, + { + "name": "Instituto Nacional de Antropología y Historia", + "type": "organization" + }, + { + "name": "INAH", + "type": "organization" + }, + { + "name": "CyArk", + "type": "else" + } + ] + }, + { + "sentence": "Kernel SVMs are available in many machine-learning toolkits , including LIBSVM , MATLAB , and others .", + "entity_list": [ + { + "name": "Kernel SVMs", + "type": "algorithm" + }, + { + "name": "machine-learning", + "type": "field" + }, + { + "name": "LIBSVM", + "type": "product" + }, + { + "name": "MATLAB", + "type": "product" + } + ] + }, + { + "sentence": "The 2009 Loebner Prize Competition was held September 6 , 2009 at the Brighton Centre , Brighton UK in conjunction with the Interspeech 2009 conference .", + "entity_list": [ + { + "name": "Loebner Prize Competition", + "type": "else" + }, + { + "name": "Brighton Centre", + "type": "location" + }, + { + "name": "Brighton", + "type": "location" + }, + { + "name": "UK", + "type": "country" + }, + { + "name": "Interspeech 2009 conference", + "type": "conference" + } + ] + }, + { + "sentence": "The humanoid QRIO robot was designed as the successor to AIBO , and runs the same base R-CODE Aperios operating system .", + "entity_list": [ + { + "name": "QRIO robot", + "type": "product" + }, + { + "name": "AIBO", + "type": "product" + }, + { + "name": "R-CODE", + "type": "product" + }, + { + "name": "Aperios operating system", + "type": "product" + } + ] + }, + { + "sentence": "Speech waveforms are generated from HMMs themselves based on the maximum likelihood criterion .", + "entity_list": [ + { + "name": "Speech waveforms", + "type": "else" + }, + { + "name": "HMMs", + "type": "algorithm" + }, + { + "name": "maximum likelihood", + "type": "algorithm" + } + ] + }, + { + "sentence": "Google Translate is a free multilingual statistical machine translation and neural machine translation service developed by Google , to translate text and websites from one language into another .", + "entity_list": [ + { + "name": "Google Translate", + "type": "product" + }, + { + "name": "multilingual statistical machine translation", + "type": "task" + }, + { + "name": "neural machine translation", + "type": "task" + }, + { + "name": "Google", + "type": "product" + } + ] + }, + { + "sentence": "Skeletons are widely used in computer vision , image analysis , pattern recognition and digital image processing for purposes such as optical character recognition , fingerprint recognition , visual inspection or compression .", + "entity_list": [ + { + "name": "computer vision", + "type": "field" + }, + { + "name": "image analysis", + "type": "field" + }, + { + "name": "pattern recognition", + "type": "field" + }, + { + "name": "digital image processing", + "type": "field" + }, + { + "name": "optical character recognition", + "type": "task" + }, + { + "name": "fingerprint recognition", + "type": "task" + }, + { + "name": "visual inspection or compression", + "type": "task" + } + ] + }, + { + "sentence": "The ImageNet Large Scale Visual Recognition Challenge is a benchmark in object classification and detection , with millions of images and hundreds of object classes .", + "entity_list": [ + { + "name": "ImageNet Large Scale Visual Recognition Challenge", + "type": "conference" + }, + { + "name": "object classification and detection", + "type": "task" + } + ] + }, + { + "sentence": "Bengio , together with Geoffrey Hinton and Yann LeCun , are referred to by some as the Godfathers of AI and Godfathers of Deep Learning .", + "entity_list": [ + { + "name": "Bengio", + "type": "researcher" + }, + { + "name": "Geoffrey Hinton", + "type": "researcher" + }, + { + "name": "Yann LeCun", + "type": "researcher" + }, + { + "name": "Godfathers of AI", + "type": "else" + }, + { + "name": "Godfathers of Deep Learning", + "type": "else" + } + ] + }, + { + "sentence": "He is a Life Fellow of IEEE .", + "entity_list": [ + { + "name": "IEEE", + "type": "organization" + } + ] + }, + { + "sentence": "NSA Bethesda is responsible for base operational support for its major tenant , the Walter Reed National Military Medical Center .", + "entity_list": [ + { + "name": "NSA Bethesda", + "type": "organization" + }, + { + "name": "Walter Reed National Military Medical Center", + "type": "organization" + } + ] + }, + { + "sentence": "The three major learning paradigms are supervised learning , unsupervised learning and reinforcement learning .", + "entity_list": [ + { + "name": "supervised learning", + "type": "field" + }, + { + "name": "unsupervised learning", + "type": "field" + }, + { + "name": "reinforcement learning", + "type": "field" + } + ] + }, + { + "sentence": "Examples include control , planning and scheduling , the ability to answer diagnostic and consumer questions , handwriting recognition , natural language understanding , speech recognition and facial recognition .", + "entity_list": [ + { + "name": "control", + "type": "task" + }, + { + "name": "planning and scheduling", + "type": "task" + }, + { + "name": "answer diagnostic and consumer questions", + "type": "task" + }, + { + "name": "handwriting recognition", + "type": "task" + }, + { + "name": "natural language understanding", + "type": "task" + }, + { + "name": "speech recognition", + "type": "task" + }, + { + "name": "facial recognition", + "type": "task" + } + ] + }, + { + "sentence": "In 1991 he was elected as a fellow of the Association for the Advancement of Artificial Intelligence ( 1990 , founding fellow ) .", + "entity_list": [ + { + "name": "Association for the Advancement of Artificial Intelligence", + "type": "conference" + } + ] + }, + { + "sentence": "However , by formulating the problem as the solution of a Toeplitz matrix and using Levinson recursion , we can relatively quickly estimate a filter with the smallest mean squared error possible .", + "entity_list": [ + { + "name": "Toeplitz matrix", + "type": "else" + }, + { + "name": "Levinson recursion", + "type": "algorithm" + }, + { + "name": "mean squared error", + "type": "metrics" + } + ] + }, + { + "sentence": "In July 2011 the 15th edition of Campus Party Spain will be held at the City of Arts and Sciences in Valencia .", + "entity_list": [ + { + "name": "15th edition of Campus Party Spain", + "type": "conference" + }, + { + "name": "City of Arts and Sciences", + "type": "location" + }, + { + "name": "Valencia", + "type": "location" + } + ] + }, + { + "sentence": "Often this is generally only possible at the very end of complicated games such as chess or go , since it is not computationally feasible to look ahead as far as the completion of the game , except towards the end , and instead , positions are given finite values as estimates of the degree of belief that they will lead to a win for one player or another .", + "entity_list": [ + { + "name": "chess", + "type": "product" + }, + { + "name": "go", + "type": "product" + } + ] + }, + { + "sentence": "The difference between the multinomial logit model and numerous other methods , models , algorithms , etc. with the same basic setup ( the perceptron algorithm , support vector machine s , linear discriminant analysis , etc .", + "entity_list": [ + { + "name": "multinomial logit model", + "type": "algorithm" + }, + { + "name": "perceptron algorithm", + "type": "algorithm" + }, + { + "name": "support vector machine", + "type": "algorithm" + }, + { + "name": "linear discriminant analysis", + "type": "algorithm" + } + ] + }, + { + "sentence": "Association for Computational Linguistics , published by", + "entity_list": [ + { + "name": "Association for Computational Linguistics", + "type": "conference" + } + ] + }, + { + "sentence": "In computerised Facial recognition system , each face is represented by a large number of pixel values .", + "entity_list": [ + { + "name": "Facial recognition system", + "type": "product" + } + ] + }, + { + "sentence": "In 2002 , his son , Daniel Pearl , a journalist working for the Wall Street Journal was kidnapped and murdered in Pakistan , leading Judea and the other members of the family and friends to create the Daniel Pearl Foundation .", + "entity_list": [ + { + "name": "Daniel Pearl", + "type": "person" + }, + { + "name": "Wall Street Journal", + "type": "organization" + }, + { + "name": "Pakistan", + "type": "country" + }, + { + "name": "Judea", + "type": "person" + }, + { + "name": "Daniel Pearl Foundation", + "type": "organization" + } + ] + }, + { + "sentence": "As of late 2006 , Red Envelope Entertainment also expanded into producing original content with filmmakers such as John Waters .", + "entity_list": [ + { + "name": "Red Envelope Entertainment", + "type": "organization" + }, + { + "name": "John Waters", + "type": "person" + } + ] + }, + { + "sentence": "The building is now part of the Beth Israel Deaconess Medical Center .", + "entity_list": [ + { + "name": "Beth Israel Deaconess Medical Center", + "type": "organization" + } + ] + }, + { + "sentence": "A common theme of this work is the adoption of a sign-theoretic perspective on issues of artificial intelligence and knowledge representation .", + "entity_list": [ + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "knowledge representation", + "type": "task" + } + ] + }, + { + "sentence": "For instance , the term neural machine translation ( NMT ) emphasizes the fact that deep learning-based approaches to machine translation directly learn sequence-to-sequence transformations , obviating the need for intermediate steps such as word alignment and language modeling that was used in statistical machine translation ( SMT ) .", + "entity_list": [ + { + "name": "neural machine translation", + "type": "task" + }, + { + "name": "NMT", + "type": "task" + }, + { + "name": "machine translation", + "type": "task" + }, + { + "name": "word alignment", + "type": "task" + }, + { + "name": "language modeling", + "type": "task" + }, + { + "name": "statistical machine translation", + "type": "task" + }, + { + "name": "SMT", + "type": "task" + } + ] + }, + { + "sentence": "Most research in the field of WSD is performed by using WordNet as a reference sense inventory for .", + "entity_list": [ + { + "name": "WSD", + "type": "field" + }, + { + "name": "WordNet", + "type": "product" + } + ] + }, + { + "sentence": "Notable former PhD students and postdoctoral researchers from his group include Richard Zemel , and Zoubin Ghahramani .", + "entity_list": [ + { + "name": "PhD", + "type": "else" + }, + { + "name": "Richard Zemel", + "type": "researcher" + }, + { + "name": "Zoubin Ghahramani", + "type": "researcher" + } + ] + }, + { + "sentence": "Each prediction result or instance of a confusion matrix represents one point in the ROC space .", + "entity_list": [ + { + "name": "confusion matrix", + "type": "metrics" + }, + { + "name": "ROC", + "type": "metrics" + } + ] + }, + { + "sentence": "In 1997 Thrun and his colleagues Wolfram Burgard and Dieter Fox developed the world 's first robotic tour guide in the Deutsches Museum Bonn ( 1997 ) .", + "entity_list": [ + { + "name": "Thrun", + "type": "researcher" + }, + { + "name": "Wolfram Burgard", + "type": "researcher" + }, + { + "name": "Dieter Fox", + "type": "researcher" + }, + { + "name": "robotic tour guide", + "type": "product" + }, + { + "name": "Deutsches Museum Bonn", + "type": "location" + } + ] + }, + { + "sentence": "WordNet is a lexical database of semantic relation s between word s in more than 200 languages. its primary use is in automatic natural language processing and artificial intelligence applications .", + "entity_list": [ + { + "name": "WordNet", + "type": "product" + }, + { + "name": "semantic relation", + "type": "else" + }, + { + "name": "natural language processing", + "type": "field" + }, + { + "name": "artificial intelligence", + "type": "field" + } + ] + }, + { + "sentence": "Conferences in the field of natural language processing , such as Association for Computational Linguistics , North American Chapter of the Association for Computational Linguistics , EMNLP , and HLT , are beginning to include papers on speech processing .", + "entity_list": [ + { + "name": "natural language processing", + "type": "field" + }, + { + "name": "Association for Computational Linguistics", + "type": "conference" + }, + { + "name": "North American Chapter of the Association for Computational Linguistics", + "type": "conference" + }, + { + "name": "EMNLP", + "type": "conference" + }, + { + "name": "HLT", + "type": "conference" + }, + { + "name": "speech processing", + "type": "field" + } + ] + }, + { + "sentence": "A set of Java programs use the lexicon to work through the variations in biomedical texts by relating words by their parts of speech , which can be helpful in web searches or searches through an electronic medical record .", + "entity_list": [ + { + "name": "Java", + "type": "program language" + }, + { + "name": "parts of speech", + "type": "else" + }, + { + "name": "electronic medical record", + "type": "else" + } + ] + }, + { + "sentence": "There are many more recent algorithms such as LPBoost , TotalBoost , BrownBoost , xgboost , MadaBoost , , and others .", + "entity_list": [ + { + "name": "LPBoost", + "type": "algorithm" + }, + { + "name": "TotalBoost", + "type": "algorithm" + }, + { + "name": "BrownBoost", + "type": "algorithm" + }, + { + "name": "xgboost", + "type": "algorithm" + }, + { + "name": "MadaBoost", + "type": "algorithm" + } + ] + }, + { + "sentence": "This is an example implementation in Python :", + "entity_list": [ + { + "name": "Python", + "type": "program language" + } + ] + }, + { + "sentence": "The Mattel Intellivision game console offered the Intellivoice Voice Synthesis module in 1982 .", + "entity_list": [ + { + "name": "Mattel", + "type": "product" + }, + { + "name": "Intellivision", + "type": "product" + }, + { + "name": "Intellivoice Voice Synthesis", + "type": "task" + } + ] + }, + { + "sentence": "He also worked on machine translation , both high-accuracy knowledge-based MT and machine learning for Statistical machine translation ( such as generalized example-based MT ) .", + "entity_list": [ + { + "name": "machine translation", + "type": "task" + }, + { + "name": "high-accuracy knowledge-based MT", + "type": "task" + }, + { + "name": "machine learning", + "type": "field" + }, + { + "name": "Statistical machine translation", + "type": "task" + }, + { + "name": "generalized example-based MT", + "type": "task" + } + ] + }, + { + "sentence": "Wolfram Mathematica ( usually termed Mathematica ) is a modern technical computing system spanning most areas of technical - including neural networks , machine learning , image processing , geometry , data science , visualizations , and others .", + "entity_list": [ + { + "name": "Wolfram Mathematica", + "type": "organization" + }, + { + "name": "Mathematica", + "type": "organization" + }, + { + "name": "neural networks", + "type": "algorithm" + }, + { + "name": "machine learning", + "type": "field" + }, + { + "name": "image processing", + "type": "field" + }, + { + "name": "geometry", + "type": "field" + }, + { + "name": "data science", + "type": "field" + }, + { + "name": "visualizations", + "type": "field" + } + ] + }, + { + "sentence": "The first digitally operated and programmable robot was invented by George Devol in 1954 and was ultimately called the Unimate .", + "entity_list": [ + { + "name": "digitally operated and programmable robot", + "type": "product" + }, + { + "name": "George Devol", + "type": "researcher" + }, + { + "name": "Unimate", + "type": "product" + } + ] + }, + { + "sentence": "Like DBNs , DBMs can learn complex and abstract internal representations of the input in tasks such as Object recognition or speech recognition , using limited , labeled data to fine-tune the representations built using a large set of unlabeled sensory input data .", + "entity_list": [ + { + "name": "DBNs", + "type": "algorithm" + }, + { + "name": "DBMs", + "type": "algorithm" + }, + { + "name": "Object recognition", + "type": "task" + }, + { + "name": "speech recognition", + "type": "task" + } + ] + }, + { + "sentence": "Scientific conferences where vision based activity recognition work often appears are ICCV and CVPR .", + "entity_list": [ + { + "name": "vision based activity recognition", + "type": "task" + }, + { + "name": "ICCV", + "type": "conference" + }, + { + "name": "CVPR", + "type": "conference" + } + ] + }, + { + "sentence": "In statistics , an expectation-maximization ( EM ) algorithm is an iterative method to find maximum likelihood or maximum a posteriori ( MAP ) estimates of parameter s in statistical model s , where the model depends on unobserved latent variable s .", + "entity_list": [ + { + "name": "statistics", + "type": "field" + }, + { + "name": "expectation-maximization", + "type": "algorithm" + }, + { + "name": "EM", + "type": "algorithm" + }, + { + "name": "maximum likelihood", + "type": "metrics" + }, + { + "name": "maximum a posteriori", + "type": "metrics" + }, + { + "name": "MAP", + "type": "metrics" + }, + { + "name": "latent variable", + "type": "else" + } + ] + }, + { + "sentence": "Similarly , investigators sometimes report the FALSE Positive Rate ( FPR ) as well as the FALSE Negative Rate ( FNR ) .", + "entity_list": [ + { + "name": "FALSE Positive Rate", + "type": "metrics" + }, + { + "name": "FPR", + "type": "metrics" + }, + { + "name": "FALSE Negative Rate", + "type": "metrics" + }, + { + "name": "FNR", + "type": "metrics" + } + ] + }, + { + "sentence": "The concept is similar to the signal to noise ratio used in the sciences and confusion matrix used in artificial intelligence .", + "entity_list": [ + { + "name": "signal to noise ratio", + "type": "metrics" + }, + { + "name": "sciences", + "type": "field" + }, + { + "name": "confusion matrix", + "type": "metrics" + }, + { + "name": "artificial intelligence", + "type": "field" + } + ] + }, + { + "sentence": "The Code of Ethics on Human Augmentation , which was originally introduced by Steve Mann in 2004 and refined with Ray Kurzweil and Marvin Minsky in 2013 , was ultimately ratified at the Virtual Reality Toronto conference on June 25 , 2017 .", + "entity_list": [ + { + "name": "Human Augmentation", + "type": "field" + }, + { + "name": "Steve Mann", + "type": "researcher" + }, + { + "name": "Ray Kurzweil", + "type": "researcher" + }, + { + "name": "Marvin Minsky", + "type": "researcher" + }, + { + "name": "Virtual Reality Toronto conference", + "type": "conference" + } + ] + }, + { + "sentence": "In 1913 , Walter R. Booth directed 10 films for the U.K. Kinoplastikon , presumably in collaboration with Cecil Hepworth .", + "entity_list": [ + { + "name": "Walter R. Booth", + "type": "person" + }, + { + "name": "U.K. Kinoplastikon", + "type": "organization" + }, + { + "name": "Cecil Hepworth", + "type": "person" + } + ] + }, + { + "sentence": "They introduced their new robot in 1961 at a trade show at Chicago 's Cow Palace .", + "entity_list": [ + { + "name": "Chicago", + "type": "location" + }, + { + "name": "Cow Palace", + "type": "location" + } + ] + }, + { + "sentence": "While some chatbot applications use extensive word-classification processes , natural language processing processors , and sophisticated Artificial intelligence , others simply scan for general keywords and generate responses using common phrases obtained from an associated library or database .", + "entity_list": [ + { + "name": "chatbot", + "type": "product" + }, + { + "name": "word-classification", + "type": "task" + }, + { + "name": "natural language processing", + "type": "field" + }, + { + "name": "Artificial intelligence", + "type": "field" + } + ] + }, + { + "sentence": "The WaveNet model proposed in 2016 achieves great performance on speech quality .", + "entity_list": [ + { + "name": "WaveNet", + "type": "product" + } + ] + }, + { + "sentence": "Organizations known to use ALE for Emergency management , disaster relief , ordinary communication or extraordinary situation response : American Red Cross , FEMA , Disaster Medical Assistance Team s , NATO , Federal Bureau of Investigation , United Nations , AT & T , Civil Air Patrol , ( ARES ) .", + "entity_list": [ + { + "name": "ALE", + "type": "product" + }, + { + "name": "Emergency management", + "type": "else" + }, + { + "name": "disaster relief", + "type": "else" + }, + { + "name": "ordinary communication", + "type": "else" + }, + { + "name": "extraordinary situation response", + "type": "else" + }, + { + "name": "American Red Cross", + "type": "organization" + }, + { + "name": "FEMA", + "type": "organization" + }, + { + "name": "Disaster Medical Assistance Team", + "type": "organization" + }, + { + "name": "NATO", + "type": "organization" + }, + { + "name": "Federal Bureau of Investigation", + "type": "organization" + }, + { + "name": "United Nations", + "type": "organization" + }, + { + "name": "AT & T", + "type": "organization" + }, + { + "name": "Civil Air Patrol", + "type": "organization" + }, + { + "name": "ARES", + "type": "organization" + } + ] + }, + { + "sentence": "Here , the Kronecker delta is used for simplicity ( cf. the derivative of a sigmoid function , being expressed via the function itself ) .", + "entity_list": [ + { + "name": "Kronecker delta", + "type": "algorithm" + }, + { + "name": "sigmoid function", + "type": "algorithm" + } + ] + }, + { + "sentence": "The theory is based in philosophical foundations , and was founded by Ray Solomonoff around 1960 . Samuel Rathmanner and Marcus Hutter .", + "entity_list": [ + { + "name": "Ray Solomonoff", + "type": "researcher" + }, + { + "name": "Samuel Rathmanner", + "type": "researcher" + }, + { + "name": "Marcus Hutter", + "type": "researcher" + } + ] + }, + { + "sentence": "WordNet , a freely available database originally designed as a semantic network based on psycholinguistic principles , was expanded by addition of definitions and is now also viewed as a dictionary .", + "entity_list": [ + { + "name": "WordNet", + "type": "product" + }, + { + "name": "semantic network", + "type": "else" + }, + { + "name": "psycholinguistic principles", + "type": "else" + } + ] + }, + { + "sentence": "Advances in the field of computational imaging research is presented in several venues including publications of SIGGRAPH and the .", + "entity_list": [ + { + "name": "computational imaging", + "type": "field" + }, + { + "name": "SIGGRAPH", + "type": "conference" + } + ] + }, + { + "sentence": "Classification can be thought of as two separate problems - binary classification and multiclass classification .", + "entity_list": [ + { + "name": "Classification", + "type": "task" + }, + { + "name": "binary classification", + "type": "task" + }, + { + "name": "multiclass classification", + "type": "task" + } + ] + }, + { + "sentence": "Advanced gene finders for both prokaryotic and eukaryotic genomes typically use complex probabilistic model s , such as hidden Markov model s ( HMMs ) to combine information from a variety of different signal and content measurements .", + "entity_list": [ + { + "name": "probabilistic model", + "type": "algorithm" + }, + { + "name": "hidden Markov model", + "type": "algorithm" + }, + { + "name": "HMMs", + "type": "algorithm" + } + ] + }, + { + "sentence": "Neuroevolution , or neuro-evolution , is a form of artificial intelligence that uses evolutionary algorithm s to generate artificial neural network s ( ANN ) , parameters , topology and rules. and evolutionary robotics .", + "entity_list": [ + { + "name": "Neuroevolution", + "type": "else" + }, + { + "name": "neuro-evolution", + "type": "else" + }, + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "evolutionary algorithm", + "type": "algorithm" + }, + { + "name": "artificial neural network", + "type": "algorithm" + }, + { + "name": "ANN", + "type": "algorithm" + }, + { + "name": "evolutionary robotics", + "type": "algorithm" + } + ] + }, + { + "sentence": "Since IBM proposed and realized the system of BLEU Papineni et al .", + "entity_list": [ + { + "name": "IBM", + "type": "organization" + }, + { + "name": "BLEU", + "type": "metrics" + }, + { + "name": "Papineni", + "type": "researcher" + } + ] + }, + { + "sentence": "In 2009 , experts attended a conference hosted by the Association for the Advancement of Artificial Intelligence ( AAAI ) to discuss whether computers and robots might be able to acquire any autonomy , and how much these abilities might pose a threat or hazard .", + "entity_list": [ + { + "name": "Association for the Advancement of Artificial Intelligence", + "type": "conference" + }, + { + "name": "AAAI", + "type": "conference" + } + ] + }, + { + "sentence": "After boosting , a classifier constructed from 200 features could yield a 95 % detection rate under a ^ { -5 } / math FALSE positive rate .P. Viola , M. Jones , Robust Real-time Object Detection , 2001 .", + "entity_list": [ + { + "name": "FALSE positive rate", + "type": "metrics" + }, + { + "name": ".P. Viola", + "type": "researcher" + }, + { + "name": "M. Jones", + "type": "researcher" + }, + { + "name": "Robust Real-time Object Detection", + "type": "task" + } + ] + }, + { + "sentence": "The website was originally Perl -based , but IMDb no longer discloses what software it uses for reasons of security .", + "entity_list": [ + { + "name": "Perl", + "type": "program language" + }, + { + "name": "IMDb", + "type": "organization" + } + ] + }, + { + "sentence": "The start-up was founded by Demis Hassabis , Shane Legg and Mustafa Suleyman in 2010 .", + "entity_list": [ + { + "name": "Demis Hassabis", + "type": "researcher" + }, + { + "name": "Shane Legg", + "type": "researcher" + }, + { + "name": "Mustafa Suleyman", + "type": "person" + } + ] + }, + { + "sentence": "Two very commonly used loss functions are the mean squared error , mathL ( a ) = a ^ 2 / math , and the absolute loss , mathL ( a ) = | a | / math .", + "entity_list": [ + { + "name": "loss functions", + "type": "else" + }, + { + "name": "mean squared error", + "type": "metrics" + }, + { + "name": "absolute loss", + "type": "metrics" + } + ] + }, + { + "sentence": "The soft-margin support vector machine described above is an example of an empirical risk minimization ( ERM ) for the hinge loss .", + "entity_list": [ + { + "name": "support vector machine", + "type": "algorithm" + }, + { + "name": "empirical risk minimization", + "type": "algorithm" + }, + { + "name": "ERM", + "type": "algorithm" + }, + { + "name": "hinge loss", + "type": "metrics" + } + ] + }, + { + "sentence": "A deep learning based approach to MT , neural machine translation has made rapid progress in recent years , and Google has announced its translation services are now using this technology in preference to its previous statistical methods .", + "entity_list": [ + { + "name": "deep learning", + "type": "field" + }, + { + "name": "MT", + "type": "task" + }, + { + "name": "neural machine translation", + "type": "task" + }, + { + "name": "Google", + "type": "organization" + } + ] + }, + { + "sentence": "This tends to yield very large performance gains when working with large corpora such as WordNet .", + "entity_list": [ + { + "name": "WordNet", + "type": "product" + } + ] + }, + { + "sentence": "Face detection is used in biometrics , often as a part of ( or together with ) a facial recognition system .", + "entity_list": [ + { + "name": "Face detection", + "type": "task" + }, + { + "name": "biometrics", + "type": "field" + }, + { + "name": "facial recognition system", + "type": "product" + } + ] + }, + { + "sentence": "trained by maximum likelihood estimation .", + "entity_list": [ + { + "name": "maximum likelihood estimation", + "type": "algorithm" + } + ] + }, + { + "sentence": ", Ltd. in Thailand ; Komatsu ( Shanghai ) Ltd. in 1996 in Shanghai , China ; Industrial Power Alliance Ltd. in Japan , a joint venture with Cummins , in 1998 ; L & T-Komatsu Limited in India in 1998 ( shares sold in 2013 ) ; and Komatsu Brasil International Ltda. in Brazil in 1998 .", + "entity_list": [ + { + "name": "Thailand", + "type": "country" + }, + { + "name": "Komatsu ( Shanghai ) Ltd.", + "type": "organization" + }, + { + "name": "Shanghai", + "type": "location" + }, + { + "name": "China", + "type": "country" + }, + { + "name": "Industrial Power Alliance Ltd.", + "type": "organization" + }, + { + "name": "Japan", + "type": "country" + }, + { + "name": "Cummins", + "type": "organization" + }, + { + "name": "L & T-Komatsu Limited", + "type": "organization" + }, + { + "name": "India", + "type": "country" + }, + { + "name": "Komatsu Brasil International Ltda.", + "type": "organization" + }, + { + "name": "Brazil", + "type": "country" + } + ] + }, + { + "sentence": "dgp also occasionally hosts artists in residence ( e.g. , Oscar -winner Chris Landreth .", + "entity_list": [ + { + "name": "dgp", + "type": "organization" + }, + { + "name": "artists in residence", + "type": "else" + }, + { + "name": "Oscar", + "type": "else" + }, + { + "name": "Chris Landreth", + "type": "person" + } + ] + }, + { + "sentence": "It currently includes four sub-competitions - the RoboMaster Robotics Competition , the RoboMaster Technical Challenge , the ICRA RoboMaster AI Challenge , and the new RoboMaster Youth Tournament .", + "entity_list": [ + { + "name": "RoboMaster Robotics Competition", + "type": "else" + }, + { + "name": "RoboMaster Technical Challenge", + "type": "else" + }, + { + "name": "ICRA RoboMaster AI Challenge", + "type": "else" + }, + { + "name": "RoboMaster Youth Tournament", + "type": "else" + } + ] + }, + { + "sentence": "By the early 2000s , the dominant speech processing strategy started to shift away from Hidden Markov model towards more modern neural networks and deep learning .", + "entity_list": [ + { + "name": "speech processing", + "type": "field" + }, + { + "name": "Hidden Markov model", + "type": "algorithm" + }, + { + "name": "neural networks", + "type": "algorithm" + }, + { + "name": "deep learning", + "type": "field" + } + ] + }, + { + "sentence": "Another equivalent expression , in the case of a binary target rate , is that the TRUE positive rate and the FALSE positive rate are equal ( and therefore the FALSE negative rate and the TRUE negative rate are equal ) for every value of the sensitive characteristics :", + "entity_list": [ + { + "name": "binary target rate", + "type": "metrics" + }, + { + "name": "TRUE positive rate", + "type": "metrics" + }, + { + "name": "FALSE positive rate", + "type": "metrics" + }, + { + "name": "FALSE negative rate", + "type": "metrics" + }, + { + "name": "TRUE negative rate", + "type": "metrics" + } + ] + }, + { + "sentence": "The MATLAB function ,", + "entity_list": [ + { + "name": "MATLAB", + "type": "product" + } + ] + }, + { + "sentence": "An articulated robot is a robot with rotary joint s ( e.g. a legged robot or an industrial robot ) .", + "entity_list": [ + { + "name": "articulated robot", + "type": "product" + }, + { + "name": "rotary joint", + "type": "else" + }, + { + "name": "industrial robot", + "type": "product" + } + ] + }, + { + "sentence": "Pandora ( also known as Pandora Media or Pandora Radio ) is an American music streaming and automated Recommender system internet radio service powered by the Music Genome Project and headquartered in Oakland , California .", + "entity_list": [ + { + "name": "Pandora", + "type": "product" + }, + { + "name": "Pandora Media", + "type": "product" + }, + { + "name": "Pandora Radio", + "type": "product" + }, + { + "name": "American", + "type": "else" + }, + { + "name": "automated Recommender system", + "type": "product" + }, + { + "name": "Music Genome Project", + "type": "else" + }, + { + "name": "Oakland", + "type": "location" + }, + { + "name": "California", + "type": "location" + } + ] + }, + { + "sentence": "She is a board member of the International Machine Learning Society , has been a member of AAAI Executive council , was PC co-chair of ICML 2011 , and has served as senior PC member for conferences including AAAI , ICML , IJCAI , ISWC , KDD , SIGMOD , UAI , VLDB , WSDM and WWW .", + "entity_list": [ + { + "name": "International Machine Learning Society", + "type": "organization" + }, + { + "name": "AAAI Executive council", + "type": "organization" + }, + { + "name": "ICML 2011", + "type": "conference" + }, + { + "name": "AAAI", + "type": "conference" + }, + { + "name": "ICML", + "type": "conference" + }, + { + "name": "IJCAI", + "type": "conference" + }, + { + "name": "ISWC", + "type": "conference" + }, + { + "name": "KDD", + "type": "conference" + }, + { + "name": "SIGMOD", + "type": "conference" + }, + { + "name": "UAI", + "type": "conference" + }, + { + "name": "VLDB", + "type": "conference" + }, + { + "name": "WSDM", + "type": "conference" + }, + { + "name": "WWW", + "type": "conference" + } + ] + }, + { + "sentence": "James S. Albus of the National Institute of Standards and Technology ( NIST ) developed the Robocrane , where the platform hangs from six cables instead of being supported by six jacks .", + "entity_list": [ + { + "name": "James S. Albus", + "type": "researcher" + }, + { + "name": "National Institute of Standards and Technology", + "type": "organization" + }, + { + "name": "NIST", + "type": "organization" + }, + { + "name": "Robocrane", + "type": "product" + } + ] + }, + { + "sentence": "Another class of direct search algorithms are the various evolutionary algorithm s , e.g. genetic algorithm s .", + "entity_list": [ + { + "name": "direct search algorithms", + "type": "else" + }, + { + "name": "evolutionary algorithm", + "type": "algorithm" + }, + { + "name": "genetic algorithm", + "type": "algorithm" + } + ] + }, + { + "sentence": "KUKA is a German manufacturer of industrial robot s and solution s for factory automation .", + "entity_list": [ + { + "name": "KUKA", + "type": "organization" + }, + { + "name": "German", + "type": "else" + }, + { + "name": "industrial robot", + "type": "product" + } + ] + }, + { + "sentence": "Other films between 2016 to 2020 that captured with IMAX camera 's were Zack Snyder ' s Batman v Superman : Dawn of Justice , Clint Eastwood ' s Sully , Damien Chazelle ' s First Man , Patty Jenkins ' Wonder Woman 1984 , Cary Joji Fukunaga ' s No Time to Die and Joseph Kosinski ' s Top Gun : Maverick .", + "entity_list": [ + { + "name": "IMAX", + "type": "else" + }, + { + "name": "Zack Snyder", + "type": "person" + }, + { + "name": "Batman v Superman : Dawn of Justice", + "type": "else" + }, + { + "name": "Clint Eastwood", + "type": "person" + }, + { + "name": "Sully", + "type": "else" + }, + { + "name": "Damien Chazelle", + "type": "person" + }, + { + "name": "First Man", + "type": "else" + }, + { + "name": "Patty Jenkins", + "type": "person" + }, + { + "name": "Wonder Woman 1984", + "type": "else" + }, + { + "name": "Cary Joji Fukunaga", + "type": "person" + }, + { + "name": "No Time to Die", + "type": "else" + }, + { + "name": "Joseph Kosinski", + "type": "person" + }, + { + "name": "Top Gun : Maverick", + "type": "else" + } + ] + }, + { + "sentence": "The trial of MICR E13B font was shown to the American Bankers Association ( ABA ) in July 1956 , which adopted it in 1958 as the MICR standard for negotiable document s in the United States .", + "entity_list": [ + { + "name": "MICR E13B", + "type": "else" + }, + { + "name": "American Bankers Association", + "type": "organization" + }, + { + "name": "ABA", + "type": "organization" + }, + { + "name": "MICR", + "type": "else" + }, + { + "name": "United States", + "type": "country" + } + ] + }, + { + "sentence": "Local search algorithms are widely applied to numerous hard computational problems , including problems from computer science ( particularly artificial intelligence ) , mathematics , operations research , engineering , and bioinformatics .", + "entity_list": [ + { + "name": "Local search algorithms", + "type": "else" + }, + { + "name": "computer science", + "type": "field" + }, + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "mathematics", + "type": "field" + }, + { + "name": "operations research", + "type": "field" + }, + { + "name": "engineering", + "type": "field" + }, + { + "name": "bioinformatics", + "type": "field" + } + ] + }, + { + "sentence": "Gerd Gigerenzer ( born September 3 , 1947 , Wallersdorf , Germany ) is a Germany psychologist who has studied the use of bounded rationality and heuristic s in decision making .", + "entity_list": [ + { + "name": "Gerd Gigerenzer", + "type": "researcher" + }, + { + "name": "Wallersdorf", + "type": "location" + }, + { + "name": "Germany", + "type": "country" + }, + { + "name": "bounded rationality", + "type": "algorithm" + }, + { + "name": "heuristic", + "type": "algorithm" + }, + { + "name": "decision making", + "type": "task" + } + ] + }, + { + "sentence": "to minimize the Mean squared error .", + "entity_list": [ + { + "name": "Mean squared error", + "type": "metrics" + } + ] + }, + { + "sentence": "But even an official language with a regulating academy , such as Standard French with the Académie française , is classified as a natural language ( for example , in the field of natural language processing ) , as its prescriptive points do not make it either constructed enough to be classified as a constructed language or controlled enough to be classified as a controlled natural language .", + "entity_list": [ + { + "name": "Standard French", + "type": "else" + }, + { + "name": "Académie française", + "type": "organization" + }, + { + "name": "natural language processing", + "type": "field" + }, + { + "name": "constructed language", + "type": "else" + }, + { + "name": "controlled natural language", + "type": "else" + } + ] + }, + { + "sentence": "There are a number of other metrics , most simply the accuracy or Fraction Correct ( FC ) , which measures the fraction of all instances that are correctly categorized ; the complement is the Fraction Incorrect ( FiC ) .", + "entity_list": [ + { + "name": "accuracy", + "type": "metrics" + }, + { + "name": "Fraction Correct", + "type": "metrics" + }, + { + "name": "FC", + "type": "metrics" + }, + { + "name": "Fraction Incorrect", + "type": "metrics" + }, + { + "name": "FiC", + "type": "metrics" + } + ] + }, + { + "sentence": "Cardie became a Fellow of the Association for Computational Linguistics in 2016 .", + "entity_list": [ + { + "name": "Cardie", + "type": "researcher" + }, + { + "name": "Association for Computational Linguistics", + "type": "conference" + } + ] + }, + { + "sentence": "Learning the parameters math \\ theta / math is usually done by maximum likelihood learning for mathp ( Y _ i | X _ i ; \\ theta ) / math .", + "entity_list": [ + { + "name": "maximum likelihood learning", + "type": "algorithm" + } + ] + }, + { + "sentence": "Cluster analysis , and Non-negative matrix factorization for descriptive mining .", + "entity_list": [ + { + "name": "Cluster analysis", + "type": "task" + }, + { + "name": "Non-negative matrix factorization", + "type": "algorithm" + }, + { + "name": "descriptive mining", + "type": "task" + } + ] + }, + { + "sentence": "In computer science and the information technology that it enables , it has been a long-term challenge to the ability in computers to do natural language processing and machine learning .", + "entity_list": [ + { + "name": "computer science", + "type": "field" + }, + { + "name": "information technology", + "type": "field" + }, + { + "name": "natural language processing", + "type": "field" + }, + { + "name": "machine learning", + "type": "field" + } + ] + }, + { + "sentence": "( Code for Gabor feature extraction from images in MATLAB can be found at", + "entity_list": [ + { + "name": "Gabor feature extraction", + "type": "algorithm" + }, + { + "name": "MATLAB", + "type": "product" + } + ] + }, + { + "sentence": "The NeuralExpert centers the design specifications around the type of problem the user would like the neural network to solve ( Classification , Prediction , Function approximation or Cluster analysis ) .", + "entity_list": [ + { + "name": "NeuralExpert", + "type": "else" + }, + { + "name": "neural network", + "type": "algorithm" + }, + { + "name": "Classification", + "type": "task" + }, + { + "name": "Prediction", + "type": "task" + }, + { + "name": "Function approximation", + "type": "task" + }, + { + "name": "Cluster analysis", + "type": "task" + } + ] + }, + { + "sentence": "When the quantization step size ( Δ ) is small relative to the variation in the signal being quantized , it is relatively simple to show that the mean squared error produced by such a rounding operation will be approximately math \\ Delta ^ 2 / 12 / math.math", + "entity_list": [ + { + "name": "quantization step size", + "type": "else" + }, + { + "name": "mean squared error", + "type": "metrics" + } + ] + }, + { + "sentence": "The construction of a rich lexicon with a suitable ontology requires significant effort , e.g. , Wordnet lexicon required many person-years of effort. G. A. Miller , R. Beckwith , C. D. Fellbaum , D. Gross , K. Miller .", + "entity_list": [ + { + "name": "Wordnet", + "type": "product" + }, + { + "name": "G. A. Miller", + "type": "researcher" + }, + { + "name": "R. Beckwith", + "type": "researcher" + }, + { + "name": "C. D. Fellbaum", + "type": "researcher" + }, + { + "name": "D. Gross", + "type": "researcher" + }, + { + "name": "K. Miller", + "type": "researcher" + } + ] + }, + { + "sentence": "Kawasaki 's portfolio also includes retractable roofs , floors and other giant structures , the Sapporo Dome ' retractable surface is one example .", + "entity_list": [ + { + "name": "Kawasaki", + "type": "organization" + }, + { + "name": "Sapporo Dome", + "type": "location" + } + ] + }, + { + "sentence": "Kappa statistics such as Fleiss ' kappa and Cohen 's kappa are methods for calculating inter-rater reliability based on different assumptions about the marginal or prior distributions , and are increasingly used as chance corrected alternatives to accuracy in other contexts .", + "entity_list": [ + { + "name": "Kappa statistics", + "type": "metrics" + }, + { + "name": "Fleiss ' kappa", + "type": "metrics" + }, + { + "name": "Cohen 's kappa", + "type": "metrics" + }, + { + "name": "inter-rater reliability", + "type": "metrics" + }, + { + "name": "accuracy", + "type": "metrics" + } + ] + }, + { + "sentence": "With his students Sepp Hochreiter , Felix Gers , Fred Cummins , Alex Graves , and others , Schmidhuber published increasingly sophisticated versions of a type of recurrent neural network called the long short-term memory ( LSTM ) .", + "entity_list": [ + { + "name": "Sepp Hochreiter", + "type": "researcher" + }, + { + "name": "Felix Gers", + "type": "researcher" + }, + { + "name": "Fred Cummins", + "type": "researcher" + }, + { + "name": "Alex Graves", + "type": "researcher" + }, + { + "name": "Schmidhuber", + "type": "researcher" + }, + { + "name": "recurrent neural network", + "type": "algorithm" + }, + { + "name": "long short-term memory", + "type": "algorithm" + }, + { + "name": "LSTM", + "type": "algorithm" + } + ] + }, + { + "sentence": "2004 - The first Cobot KUKA LBR 3 is released .", + "entity_list": [ + { + "name": "Cobot KUKA LBR 3", + "type": "product" + } + ] + }, + { + "sentence": "Two shallow approaches used to train and then disambiguate are Naive Bayes classifier and decision trees .", + "entity_list": [ + { + "name": "Naive Bayes classifier", + "type": "algorithm" + }, + { + "name": "decision trees", + "type": "algorithm" + } + ] + }, + { + "sentence": "The first practical forms of photography were introduced in January 1839 by Louis Daguerre and Henry Fox Talbot .", + "entity_list": [ + { + "name": "photography", + "type": "else" + }, + { + "name": "Louis Daguerre", + "type": "person" + }, + { + "name": "Henry Fox Talbot", + "type": "person" + } + ] + }, + { + "sentence": "For example , speech synthesis , combined with speech recognition , allows for interaction with mobile devices via language processing interfaces .", + "entity_list": [ + { + "name": "speech synthesis", + "type": "task" + }, + { + "name": "speech recognition", + "type": "task" + }, + { + "name": "language processing", + "type": "field" + } + ] + }, + { + "sentence": "Phidgets can be programmed using a variety of software and programming languages , ranging from Java to Microsoft Excel .", + "entity_list": [ + { + "name": "Phidgets", + "type": "product" + }, + { + "name": "Java", + "type": "program language" + }, + { + "name": "Microsoft Excel", + "type": "product" + } + ] + }, + { + "sentence": "The term machine learning was coined in 1959 by Arthur Samuel , an American IBMer and pioneer in the field of computer gaming and artificial intelligence .", + "entity_list": [ + { + "name": "machine learning", + "type": "field" + }, + { + "name": "Arthur Samuel", + "type": "researcher" + }, + { + "name": "American IBMer", + "type": "else" + }, + { + "name": "computer gaming", + "type": "field" + }, + { + "name": "artificial intelligence", + "type": "field" + } + ] + }, + { + "sentence": "The Israeli poet David Avidan , who was fascinated with future technologies and their relation to art , desired to explore the use of computers for writing literature .", + "entity_list": [ + { + "name": "Israeli", + "type": "else" + }, + { + "name": "David Avidan", + "type": "person" + } + ] + }, + { + "sentence": "As part of the GATEway Project in 2017 , Oxbotica trialled seven autonomous shuttle buses in Greenwich , navigating a two-mile riverside path near London 's The O2 Arena on a route also used by pedestrians and cyclists .", + "entity_list": [ + { + "name": "GATEway Project", + "type": "else" + }, + { + "name": "Oxbotica", + "type": "organization" + }, + { + "name": "Greenwich", + "type": "location" + }, + { + "name": "London", + "type": "location" + }, + { + "name": "The O2 Arena", + "type": "location" + } + ] + }, + { + "sentence": "An unrelated but commonly used combination of basic statistics from information retrieval is the F-score , being a ( possibly weighted ) harmonic mean of recall and precision where recall = sensitivity = TRUE positive rate , but specificity and precision are totally different measures .", + "entity_list": [ + { + "name": "information retrieval", + "type": "task" + }, + { + "name": "F-score", + "type": "metrics" + }, + { + "name": "harmonic mean", + "type": "else" + }, + { + "name": "recall", + "type": "metrics" + }, + { + "name": "precision", + "type": "metrics" + }, + { + "name": "sensitivity", + "type": "metrics" + }, + { + "name": "TRUE positive rate", + "type": "metrics" + }, + { + "name": "specificity", + "type": "metrics" + } + ] + }, + { + "sentence": "Neuromorphic engineering is an interdisciplinary subject that takes inspiration from biology , physics , mathematics , computer science , and electronic engineering to design artificial neural systems , such as vision systems , head-eye systems , auditory processors , and autonomous robots , whose physical architecture and design principles are based on those of biological nervous systems .", + "entity_list": [ + { + "name": "Neuromorphic engineering", + "type": "field" + }, + { + "name": "biology", + "type": "field" + }, + { + "name": "physics", + "type": "field" + }, + { + "name": "mathematics", + "type": "field" + }, + { + "name": "computer science", + "type": "field" + }, + { + "name": "electronic engineering", + "type": "field" + }, + { + "name": "vision systems", + "type": "product" + }, + { + "name": "head-eye systems", + "type": "product" + }, + { + "name": "auditory processors", + "type": "product" + }, + { + "name": "autonomous robots", + "type": "product" + }, + { + "name": "biological nervous systems", + "type": "product" + } + ] + }, + { + "sentence": "To be specific , the BIBO stability criterion requires that the ROC of the system includes the unit circle .", + "entity_list": [ + { + "name": "BIBO stability criterion", + "type": "metrics" + }, + { + "name": "ROC", + "type": "metrics" + } + ] + }, + { + "sentence": "2 The program was rewritten in Java beginning in 1998 .", + "entity_list": [ + { + "name": "Java", + "type": "program language" + } + ] + }, + { + "sentence": "The MCC can be calculated directly from the confusion matrix using the formula :", + "entity_list": [ + { + "name": "MCC", + "type": "metrics" + }, + { + "name": "confusion matrix", + "type": "metrics" + } + ] + }, + { + "sentence": "It was developed by a team at the MIT-IBM Watson AI Lab and first presented at the 2018 International Conference on Learning Representations .", + "entity_list": [ + { + "name": "MIT-IBM Watson AI Lab", + "type": "organization" + }, + { + "name": "2018 International Conference on Learning Representations", + "type": "conference" + } + ] + }, + { + "sentence": "When the TRUE prevalence s for the two positive variables are equal as assumed in Fleiss kappa and F-score , that is the number of positive predictions matches the number of positive classes in the dichotomous ( two class ) case , the different kappa and correlation measure collapse to identity with Youden 's J , and recall , precision and F-score are similarly identical with accuracy .", + "entity_list": [ + { + "name": "TRUE prevalence", + "type": "metrics" + }, + { + "name": "Fleiss kappa", + "type": "metrics" + }, + { + "name": "F-score", + "type": "metrics" + }, + { + "name": "kappa", + "type": "metrics" + }, + { + "name": "correlation", + "type": "metrics" + }, + { + "name": "Youden 's J", + "type": "researcher" + }, + { + "name": "recall", + "type": "metrics" + }, + { + "name": "precision", + "type": "metrics" + }, + { + "name": "accuracy", + "type": "metrics" + } + ] + }, + { + "sentence": "The Building Educational Applications workshop ( BEA ) at NAACL 2013 hosted the inaugural NLI shared task. Tetreault et al , 2013 The competition resulted in 29 entries from teams across the globe , 24 of which also published a paper describing their systems and approaches .", + "entity_list": [ + { + "name": "Building Educational Applications workshop", + "type": "conference" + }, + { + "name": "BEA", + "type": "conference" + }, + { + "name": "NAACL", + "type": "conference" + }, + { + "name": "NLI shared task.", + "type": "task" + }, + { + "name": "Tetreault", + "type": "researcher" + } + ] + }, + { + "sentence": "The Viterbi algorithm is a dynamic programming algorithm for finding the most likely sequence of hidden states called the Viterbi path that results in a sequence of observed events , especially in the context of Markov information source s and hidden Markov model s ( HMM ) .", + "entity_list": [ + { + "name": "Viterbi algorithm", + "type": "algorithm" + }, + { + "name": "dynamic programming algorithm", + "type": "algorithm" + }, + { + "name": "hidden states", + "type": "else" + }, + { + "name": "Viterbi path", + "type": "else" + }, + { + "name": "Markov information source", + "type": "else" + }, + { + "name": "hidden Markov model", + "type": "algorithm" + }, + { + "name": "HMM", + "type": "algorithm" + } + ] + }, + { + "sentence": "In statistics , multinomial logistic regression is a classification method that generalizes logistic regression to multiclass classification , i.e. with more than two possible discrete outcomes .", + "entity_list": [ + { + "name": "statistics", + "type": "field" + }, + { + "name": "multinomial logistic regression", + "type": "algorithm" + }, + { + "name": "classification method", + "type": "else" + }, + { + "name": "logistic regression", + "type": "algorithm" + }, + { + "name": "multiclass classification", + "type": "task" + } + ] + }, + { + "sentence": "Hidden Markov models are known for their applications to reinforcement learning and temporal pattern recognition such as speech , handwriting recognition , gesture recognition , Thad Starner , Alex Pentland .", + "entity_list": [ + { + "name": "Hidden Markov models", + "type": "algorithm" + }, + { + "name": "reinforcement learning", + "type": "field" + }, + { + "name": "temporal pattern recognition", + "type": "field" + }, + { + "name": "speech", + "type": "task" + }, + { + "name": "handwriting recognition", + "type": "task" + }, + { + "name": "gesture recognition", + "type": "task" + }, + { + "name": "Thad Starner", + "type": "researcher" + }, + { + "name": "Alex Pentland", + "type": "researcher" + } + ] + }, + { + "sentence": "Essentially , this means that if the n-gram has been seen more than k times in training , the conditional probability of a word given its history is proportional to the maximum likelihood estimate of that n -gram .", + "entity_list": [ + { + "name": "n-gram", + "type": "else" + }, + { + "name": "maximum likelihood estimate", + "type": "metrics" + }, + { + "name": "n -gram", + "type": "else" + } + ] + }, + { + "sentence": "He is interested in knowledge representation , commonsense reasoning , and natural language understanding , believing that deep language understanding can only currently be achieved by significant hand-engineering of semantically-rich formalisms coupled with statistical preferences .", + "entity_list": [ + { + "name": "knowledge representation", + "type": "task" + }, + { + "name": "commonsense reasoning", + "type": "task" + }, + { + "name": "natural language understanding", + "type": "task" + }, + { + "name": "deep language understanding", + "type": "task" + }, + { + "name": "hand-engineering", + "type": "else" + } + ] + }, + { + "sentence": "In JavaScript , Python or", + "entity_list": [ + { + "name": "JavaScript", + "type": "program language" + }, + { + "name": "Python", + "type": "program language" + } + ] + }, + { + "sentence": "The Newcomb Awards are announced in the AI Magazine published by AAAI .", + "entity_list": [ + { + "name": "Newcomb Awards", + "type": "else" + }, + { + "name": "AI Magazine", + "type": "else" + }, + { + "name": "AAAI", + "type": "conference" + } + ] + }, + { + "sentence": "The Mean squared error on a test set of 100 exemplars is 0.084 , smaller than the unnormalized error .", + "entity_list": [ + { + "name": "Mean squared error", + "type": "metrics" + } + ] + }, + { + "sentence": "The F-score has been widely used in the natural language processing literature , such as the evaluation of named entity recognition ( NER ) and word segmentation .", + "entity_list": [ + { + "name": "F-score", + "type": "metrics" + }, + { + "name": "natural language processing", + "type": "field" + }, + { + "name": "named entity recognition", + "type": "task" + }, + { + "name": "NER", + "type": "task" + }, + { + "name": "word segmentation", + "type": "task" + } + ] + }, + { + "sentence": "Chatbots are typically used in dialog systems for various purposes including customer service , request routing , or for information gathering .", + "entity_list": [ + { + "name": "Chatbots", + "type": "product" + }, + { + "name": "dialog systems", + "type": "product" + }, + { + "name": "request routing", + "type": "else" + }, + { + "name": "information gathering", + "type": "else" + } + ] + }, + { + "sentence": "Important journals include the IEEE Transactions on Speech and Audio Processing ( later renamed IEEE Transactions on Audio , Speech and Language Processing and since Sept 2014 renamed IEEE / ACM Transactions on Audio , Speech and Language Processing - after merging with an ACM publication ) , Computer Speech and Language , and Speech Communication .", + "entity_list": [ + { + "name": "IEEE Transactions on Speech and Audio Processing", + "type": "conference" + }, + { + "name": "IEEE Transactions on Audio , Speech and Language Processing", + "type": "conference" + }, + { + "name": "IEEE / ACM Transactions on Audio , Speech and Language Processing", + "type": "conference" + }, + { + "name": "ACM", + "type": "conference" + }, + { + "name": "Computer Speech and Language", + "type": "conference" + }, + { + "name": "Speech Communication", + "type": "conference" + } + ] + }, + { + "sentence": "EM is frequently used for data clustering in machine learning and computer vision .", + "entity_list": [ + { + "name": "EM", + "type": "algorithm" + }, + { + "name": "data clustering", + "type": "task" + }, + { + "name": "machine learning", + "type": "field" + }, + { + "name": "computer vision", + "type": "field" + } + ] + }, + { + "sentence": "While there is no perfect way of describing the confusion matrix of TRUE and FALSE positives and negatives by a single number , the Matthews correlation coefficient is generally regarded as being one of the best such measures .", + "entity_list": [ + { + "name": "confusion matrix", + "type": "metrics" + }, + { + "name": "Matthews correlation coefficient", + "type": "metrics" + } + ] + }, + { + "sentence": "As data set s have grown in size and complexity , direct hands-on data analysis has been augmented with indirect , automated data processing , aided by other discoveries in computer science , specially in the field of machine learning , such as neural networks , cluster analysis , genetic algorithms ( 1950s ) , decision tree learning and decision rules ( 1960s ) , and support vector machines ( 1990s ) .", + "entity_list": [ + { + "name": "data analysis", + "type": "field" + }, + { + "name": "computer science", + "type": "field" + }, + { + "name": "machine learning", + "type": "field" + }, + { + "name": "neural networks", + "type": "algorithm" + }, + { + "name": "cluster analysis", + "type": "task" + }, + { + "name": "genetic algorithms", + "type": "algorithm" + }, + { + "name": "decision tree learning", + "type": "algorithm" + }, + { + "name": "decision rules", + "type": "algorithm" + }, + { + "name": "support vector machines", + "type": "algorithm" + } + ] + }, + { + "sentence": "In the fall of 2005 , Thrun published a textbook entitled Probabilistic Robotics together with his long-term co-workers Dieter Fox and Wolfram Burgard .", + "entity_list": [ + { + "name": "Thrun", + "type": "researcher" + }, + { + "name": "Probabilistic Robotics", + "type": "else" + }, + { + "name": "Dieter Fox", + "type": "researcher" + }, + { + "name": "Wolfram Burgard", + "type": "researcher" + } + ] + }, + { + "sentence": "John D. Lafferty , Andrew McCallum and Pereiramath as follows :", + "entity_list": [ + { + "name": "John D. Lafferty", + "type": "researcher" + }, + { + "name": "Andrew McCallum", + "type": "researcher" + }, + { + "name": "Pereiramath", + "type": "researcher" + } + ] + }, + { + "sentence": "Question answering ( QA ) is a computer science discipline within the fields of information retrieval and natural language processing ( NLP ) , which is concerned with building systems that automatically answer questions posed by humans in a natural language .", + "entity_list": [ + { + "name": "Question answering", + "type": "task" + }, + { + "name": "QA", + "type": "task" + }, + { + "name": "computer science", + "type": "field" + }, + { + "name": "information retrieval", + "type": "field" + }, + { + "name": "natural language processing", + "type": "field" + }, + { + "name": "NLP", + "type": "field" + } + ] + }, + { + "sentence": "However , in the version of the metric used by NIST evaluations prior to 2009 , the shortest reference sentence had been used instead .", + "entity_list": [ + { + "name": "NIST", + "type": "metrics" + } + ] + }, + { + "sentence": "On August 27 , 2018 , Toyota announced an investment of $ 500 Million in Uber ' s autonomous car s .", + "entity_list": [ + { + "name": "Toyota", + "type": "person" + }, + { + "name": "Uber", + "type": "organization" + }, + { + "name": "autonomous car", + "type": "product" + } + ] + }, + { + "sentence": "The sample maximum is the maximum likelihood estimator for the population maximum , but , as discussed above , it is biased .", + "entity_list": [ + { + "name": "maximum likelihood estimator", + "type": "metrics" + } + ] + }, + { + "sentence": "LSI helps overcome synonymy by increasing recall , one of the most problematic constraints of Boolean keyword queries and vector space models .", + "entity_list": [ + { + "name": "LSI", + "type": "task" + }, + { + "name": "synonymy", + "type": "else" + }, + { + "name": "recall", + "type": "metrics" + }, + { + "name": "Boolean keyword queries", + "type": "algorithm" + }, + { + "name": "vector space models", + "type": "algorithm" + } + ] + }, + { + "sentence": "Data acquisition applications are usually controlled by software programs developed using various general purpose programming languages such as Assembly , BASIC , C , C + + , C # , Fortran , Java , LabVIEW , Lisp , Pascal , etc .", + "entity_list": [ + { + "name": "Data acquisition", + "type": "task" + }, + { + "name": "Assembly", + "type": "program language" + }, + { + "name": "BASIC", + "type": "program language" + }, + { + "name": "C", + "type": "program language" + }, + { + "name": "C + +", + "type": "program language" + }, + { + "name": "C #", + "type": "program language" + }, + { + "name": "Fortran", + "type": "program language" + }, + { + "name": "Java", + "type": "program language" + }, + { + "name": "LabVIEW", + "type": "program language" + }, + { + "name": "Lisp", + "type": "program language" + }, + { + "name": "Pascal", + "type": "program language" + } + ] + }, + { + "sentence": "In 2003 , Honda released its Cog advertisement in the UK and on the Internet .", + "entity_list": [ + { + "name": "Honda", + "type": "organization" + }, + { + "name": "Cog", + "type": "product" + }, + { + "name": "UK", + "type": "country" + } + ] + }, + { + "sentence": "The Association for Computational Linguistics defines computational linguistics as :", + "entity_list": [ + { + "name": "Association for Computational Linguistics", + "type": "conference" + }, + { + "name": "computational linguistics", + "type": "field" + } + ] + }, + { + "sentence": "Expectation-maximization algorithm s may be employed to calculate approximate maximum likelihood estimates of unknown state-space parameters within minimum-variance filters and smoothers .", + "entity_list": [ + { + "name": "Expectation-maximization algorithm", + "type": "algorithm" + }, + { + "name": "maximum likelihood estimates", + "type": "algorithm" + } + ] + }, + { + "sentence": "Correspondents included former Baywatch actresses Donna D 'Errico , Carmen Electra , and Traci Bingham , former Playboy Playmate Heidi Mark , comedian Arj Barker and identical twins Randy and Jason Sklar .", + "entity_list": [ + { + "name": "Baywatch", + "type": "else" + }, + { + "name": "Donna D 'Errico", + "type": "person" + }, + { + "name": "Carmen Electra", + "type": "person" + }, + { + "name": "Traci Bingham", + "type": "person" + }, + { + "name": "Playboy Playmate", + "type": "else" + }, + { + "name": "Heidi Mark", + "type": "person" + }, + { + "name": "Arj Barker", + "type": "person" + }, + { + "name": "Randy", + "type": "person" + }, + { + "name": "Jason Sklar", + "type": "person" + } + ] + }, + { + "sentence": "It is commonly used to generate representations for speech recognition ( ASR ) , e.g. the CMU Sphinx system , and speech synthesis ( TTS ) , e.g. the Festival system .", + "entity_list": [ + { + "name": "speech recognition", + "type": "task" + }, + { + "name": "ASR", + "type": "task" + }, + { + "name": "CMU Sphinx system", + "type": "product" + }, + { + "name": "speech synthesis", + "type": "task" + }, + { + "name": "TTS", + "type": "task" + }, + { + "name": "Festival system", + "type": "product" + } + ] + }, + { + "sentence": "Sensitivity or TRUE Positive Rate ( TPR ) , also known as recall , is the proportion of people that tested positive and are positive ( TRUE Positive , TP ) of all the people that actually are positive ( Condition Positive , CP = TP + FN ) .", + "entity_list": [ + { + "name": "Sensitivity", + "type": "metrics" + }, + { + "name": "TRUE Positive Rate", + "type": "metrics" + }, + { + "name": "TPR", + "type": "metrics" + }, + { + "name": "recall", + "type": "metrics" + }, + { + "name": "TRUE Positive", + "type": "metrics" + }, + { + "name": "TP", + "type": "metrics" + }, + { + "name": "Condition Positive", + "type": "metrics" + }, + { + "name": "CP", + "type": "metrics" + }, + { + "name": "TP + FN", + "type": "metrics" + } + ] + }, + { + "sentence": "Popular speech recognition conferences held each year or two include SpeechTEK and SpeechTEK Europe , ICASSP , Interspeech / Eurospeech , and the IEEE ASRU .", + "entity_list": [ + { + "name": "speech recognition", + "type": "task" + }, + { + "name": "SpeechTEK", + "type": "conference" + }, + { + "name": "SpeechTEK Europe", + "type": "conference" + }, + { + "name": "ICASSP", + "type": "conference" + }, + { + "name": "Interspeech", + "type": "conference" + }, + { + "name": "Eurospeech", + "type": "conference" + }, + { + "name": "IEEE ASRU", + "type": "conference" + } + ] + }, + { + "sentence": "Devol collaborated with Engelberger , who served as president of the company , to engineer and produce an industrial robot under the brand name Unimate .", + "entity_list": [ + { + "name": "Devol", + "type": "researcher" + }, + { + "name": "Engelberger", + "type": "researcher" + }, + { + "name": "industrial robot", + "type": "product" + }, + { + "name": "Unimate", + "type": "product" + } + ] + }, + { + "sentence": "A Hidden Markov model ( HMM ) is a statistical Markov model in which the system being modeled is assumed to be a Markov process with unobserved ( hidden ) states .", + "entity_list": [ + { + "name": "Hidden Markov model", + "type": "algorithm" + }, + { + "name": "HMM", + "type": "algorithm" + }, + { + "name": "statistical Markov model", + "type": "algorithm" + }, + { + "name": "Markov process", + "type": "algorithm" + } + ] + }, + { + "sentence": "This property , undesirable in many applications , has led researchers to use alternatives such as the mean absolute error , or those based on the median .", + "entity_list": [ + { + "name": "mean absolute error", + "type": "metrics" + }, + { + "name": "median", + "type": "else" + } + ] + }, + { + "sentence": "Such a sequence ( which depends on the outcome of the investigation of previous attributes at each stage ) is called a decision tree and applied in the area of machine learning known as decision tree learning .", + "entity_list": [ + { + "name": "decision tree", + "type": "algorithm" + }, + { + "name": "machine learning", + "type": "field" + }, + { + "name": "decision tree learning", + "type": "algorithm" + } + ] + }, + { + "sentence": "As in factor analysis , the LCA can also be used to classify case according to their maximum likelihood class membership .", + "entity_list": [ + { + "name": "factor analysis", + "type": "task" + }, + { + "name": "LCA", + "type": "algorithm" + }, + { + "name": "maximum likelihood", + "type": "algorithm" + } + ] + }, + { + "sentence": "Supervised neural networks that use a mean squared error ( MSE ) cost function can use formal statistical methods to determine the confidence of the trained model .", + "entity_list": [ + { + "name": "Supervised neural networks", + "type": "algorithm" + }, + { + "name": "mean squared error", + "type": "metrics" + }, + { + "name": "MSE", + "type": "metrics" + }, + { + "name": "cost function", + "type": "else" + } + ] + }, + { + "sentence": "This can be directly expressed as a linear program , but it is also equivalent to Tikhonov regularization with the hinge loss function , mathV ( f ( x ) , y ) = \\ max ( 0 , 1 - yf ( x ) ) / math :", + "entity_list": [ + { + "name": "Tikhonov regularization", + "type": "algorithm" + }, + { + "name": "hinge loss function", + "type": "metrics" + } + ] + }, + { + "sentence": "The following technique was described in Breiman 's original paper and is implemented in the R package randomForest .", + "entity_list": [ + { + "name": "Breiman", + "type": "researcher" + }, + { + "name": "R package randomForest", + "type": "product" + } + ] + }, + { + "sentence": "Traditional image quality measures , such as PSNR , are typically performed on fixed resolution images and do not take into account some aspects of the human visual system , like the change in spatial resolution across the retina .", + "entity_list": [ + { + "name": "PSNR", + "type": "metrics" + }, + { + "name": "retina", + "type": "else" + } + ] + }, + { + "sentence": "John Ireland , Joanne Dru and Macdonald Carey starred in the Jack Broder color production Hannah Lee , which premiered June 19 , 1953 .", + "entity_list": [ + { + "name": "John Ireland", + "type": "person" + }, + { + "name": "Joanne Dru", + "type": "person" + }, + { + "name": "Macdonald Carey", + "type": "person" + }, + { + "name": "Jack Broder", + "type": "person" + }, + { + "name": "Hannah Lee", + "type": "else" + } + ] + }, + { + "sentence": "That process is called image registration , and uses different methods of computer vision , mostly related to tracking .", + "entity_list": [ + { + "name": "image registration", + "type": "task" + }, + { + "name": "computer vision", + "type": "field" + }, + { + "name": "tracking", + "type": "task" + } + ] + }, + { + "sentence": "Now let us start explaining the different possible relations between predicted and actual outcome : Confusion matrix", + "entity_list": [] + }, + { + "sentence": "The VOICEBOX speech processing toolbox for MATLAB implements the conversion and its inverse as :", + "entity_list": [ + { + "name": "VOICEBOX", + "type": "product" + }, + { + "name": "speech processing toolbox", + "type": "else" + }, + { + "name": "MATLAB", + "type": "product" + } + ] + }, + { + "sentence": "Prolog is a logic programming language associated with artificial intelligence and computational linguistics .", + "entity_list": [ + { + "name": "Prolog", + "type": "program language" + }, + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "computational linguistics", + "type": "field" + } + ] + }, + { + "sentence": "Milner has received numerous awards for her contributions to neuroscience and psychology including memberships in the Royal Society of London , the Royal Society of Canada and the National Academy of Sciences .", + "entity_list": [ + { + "name": "Milner", + "type": "researcher" + }, + { + "name": "neuroscience", + "type": "field" + }, + { + "name": "psychology", + "type": "field" + }, + { + "name": "Royal Society of London", + "type": "organization" + }, + { + "name": "Royal Society of Canada", + "type": "organization" + }, + { + "name": "National Academy of Sciences", + "type": "organization" + } + ] + }, + { + "sentence": "By combining these operators one can obtain algorithms for many image processing tasks , such as feature extraction , image segmentation , image sharpening , image filtering , and classification .", + "entity_list": [ + { + "name": "image processing", + "type": "field" + }, + { + "name": "feature extraction", + "type": "task" + }, + { + "name": "image segmentation", + "type": "task" + }, + { + "name": "image sharpening", + "type": "task" + }, + { + "name": "image filtering", + "type": "task" + }, + { + "name": "classification", + "type": "task" + } + ] + }, + { + "sentence": "As of 2017 , he is a professor at the Collège de France and , since 1989 , the director of INSERM Unit 562 , Cognitive Neuroimaging .", + "entity_list": [ + { + "name": "Collège de France", + "type": "university" + }, + { + "name": "INSERM Unit 562", + "type": "organization" + }, + { + "name": "Cognitive Neuroimaging", + "type": "field" + } + ] + }, + { + "sentence": "There are many approaches to learning these embeddings , notably using Bayesian clustering frameworks or energy-based frameworks , and more recently , TransE ( Conference on Neural Information Processing Systems 2013 ) .", + "entity_list": [ + { + "name": "Bayesian clustering frameworks", + "type": "algorithm" + }, + { + "name": "energy-based frameworks", + "type": "algorithm" + }, + { + "name": "TransE", + "type": "conference" + }, + { + "name": "Conference on Neural Information Processing Systems 2013", + "type": "conference" + } + ] + }, + { + "sentence": "It is an alternative to the Word error rate ( Word Error Rate ) used in several countries .", + "entity_list": [ + { + "name": "Word error rate", + "type": "metrics" + }, + { + "name": "Word Error Rate", + "type": "metrics" + } + ] + }, + { + "sentence": "ANNs have been used on a variety of tasks , including computer vision , speech recognition , machine translation , social network filtering , playing board and video games , medical diagnosis , and even in activities that have traditionally been considered as reserved to humans , like painting .", + "entity_list": [ + { + "name": "ANNs", + "type": "algorithm" + }, + { + "name": "computer vision", + "type": "field" + }, + { + "name": "speech recognition", + "type": "task" + }, + { + "name": "machine translation", + "type": "task" + }, + { + "name": "social network filtering", + "type": "task" + }, + { + "name": "playing board and video games", + "type": "task" + }, + { + "name": "medical diagnosis", + "type": "task" + }, + { + "name": "painting", + "type": "task" + } + ] + }, + { + "sentence": "Modular Audio Recognition Framework ( MARF ) is an open-source research platform and a collection of voice , sound , speech , text and natural language processing ( NLP ) algorithm s written in Java and arranged into a modular and extensible framework that attempts to facilitate addition of new algorithm s .", + "entity_list": [ + { + "name": "Modular Audio Recognition Framework", + "type": "product" + }, + { + "name": "MARF", + "type": "product" + }, + { + "name": "natural language processing", + "type": "field" + }, + { + "name": "NLP", + "type": "field" + }, + { + "name": "Java", + "type": "program language" + } + ] + }, + { + "sentence": "In 2018 , a report by the civil liberties and rights campaigning organisation Big Brother Watch revealed that two United Kingdom police forces , South Wales Police and the Metropolitan Police , were using live facial recognition at public events and in public spaces , in September 2019 , South Wales Police use of facial recognition was ruled lawful .", + "entity_list": [ + { + "name": "Big Brother Watch", + "type": "organization" + }, + { + "name": "United Kingdom", + "type": "country" + }, + { + "name": "South Wales Police", + "type": "organization" + }, + { + "name": "Metropolitan Police", + "type": "organization" + }, + { + "name": "facial recognition", + "type": "task" + } + ] + }, + { + "sentence": "ANIMAL has been ported to R , a freely available language and environment for statistical computing and graphics .", + "entity_list": [ + { + "name": "ANIMAL", + "type": "product" + }, + { + "name": "R", + "type": "program language" + }, + { + "name": "statistical computing", + "type": "field" + }, + { + "name": "graphics", + "type": "field" + } + ] + }, + { + "sentence": "Time-inhomogeneous hidden Bernoulli model ( TI-HBM ) is an alternative to hidden Markov model ( HMM ) for automatic speech recognition .", + "entity_list": [ + { + "name": "Time-inhomogeneous hidden Bernoulli model", + "type": "algorithm" + }, + { + "name": "TI-HBM", + "type": "algorithm" + }, + { + "name": "hidden Markov model", + "type": "algorithm" + }, + { + "name": "HMM", + "type": "algorithm" + }, + { + "name": "automatic speech recognition", + "type": "task" + } + ] + }, + { + "sentence": "In July 2016 , Nvidia demonstrated during SIGGRAPH a new method of foveated rendering claimed to be invisible to users .", + "entity_list": [ + { + "name": "Nvidia", + "type": "organization" + }, + { + "name": "SIGGRAPH", + "type": "conference" + } + ] + }, + { + "sentence": "Both rely on speech act theory developed by John Searle in the 1960s and enhanced by Terry Winograd and Flores in the 1970s .", + "entity_list": [ + { + "name": "speech act theory", + "type": "else" + }, + { + "name": "John Searle", + "type": "researcher" + }, + { + "name": "Terry Winograd", + "type": "researcher" + }, + { + "name": "Flores", + "type": "researcher" + } + ] + }, + { + "sentence": "Neural network models of concept formation and the structure of knowledge have opened powerful hierarchical models of knowledge organization such as George Miller ' s Wordnet .", + "entity_list": [ + { + "name": "Neural network models", + "type": "algorithm" + }, + { + "name": "George Miller", + "type": "researcher" + }, + { + "name": "Wordnet", + "type": "product" + } + ] + }, + { + "sentence": "Template matching has various applications and is used in such fields as face recognition ( see facial recognition system ) and medical image processing .", + "entity_list": [ + { + "name": "Template matching", + "type": "algorithm" + }, + { + "name": "face recognition", + "type": "task" + }, + { + "name": "facial recognition system", + "type": "product" + }, + { + "name": "medical image processing", + "type": "task" + } + ] + }, + { + "sentence": "However , usage only became widespread in 2005 when Navneet Dalal and Bill Triggs , researchers for the French National Institute for Research in Computer Science and Automation ( INRIA ) , presented their supplementary work on HOG descriptors at the Conference on Computer Vision and Pattern Recognition ( CVPR ) .", + "entity_list": [ + { + "name": "Navneet Dalal", + "type": "researcher" + }, + { + "name": "Bill Triggs", + "type": "researcher" + }, + { + "name": "French National Institute for Research in Computer Science and Automation", + "type": "organization" + }, + { + "name": "INRIA", + "type": "organization" + }, + { + "name": "HOG descriptors", + "type": "algorithm" + }, + { + "name": "Conference on Computer Vision and Pattern Recognition", + "type": "conference" + }, + { + "name": "CVPR", + "type": "conference" + } + ] + }, + { + "sentence": "Prior to joining the Penn faculty in 2002 , he spent a decade ( 1991-2001 ) in AT & T Labs and Bell Labs , including as head of the AI department with colleagues including Michael L. Littman , David A. McAllester , and Richard S. Sutton ; Secure Systems Research department ; and Machine Learning department with members such as Michael Collins and the leader ) .", + "entity_list": [ + { + "name": "Penn", + "type": "university" + }, + { + "name": "AT & T Labs", + "type": "organization" + }, + { + "name": "Bell Labs", + "type": "organization" + }, + { + "name": "AI", + "type": "field" + }, + { + "name": "Michael L. Littman", + "type": "researcher" + }, + { + "name": "David A. McAllester", + "type": "researcher" + }, + { + "name": "Richard S. Sutton", + "type": "researcher" + }, + { + "name": "Secure Systems Research department", + "type": "organization" + }, + { + "name": "Machine Learning", + "type": "field" + }, + { + "name": "Michael Collins", + "type": "researcher" + } + ] + }, + { + "sentence": "When data are unlabelled , supervised learning is not possible , and an unsupervised learning approach is required which attempts to find natural Cluster analysis to groups , and then map new data to these formed groups .", + "entity_list": [ + { + "name": "supervised learning", + "type": "field" + }, + { + "name": "unsupervised learning", + "type": "field" + }, + { + "name": "Cluster analysis", + "type": "task" + } + ] + }, + { + "sentence": "This field of computer science developed in the 1950s at academic institutions such as the MIT A.I. Lab , originally as a branch of artificial intelligence and robotics .", + "entity_list": [ + { + "name": "computer science", + "type": "field" + }, + { + "name": "MIT A.I. Lab", + "type": "organization" + }, + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "robotics", + "type": "field" + } + ] + }, + { + "sentence": "It could also be replaced by the Log loss equation below :", + "entity_list": [ + { + "name": "Log loss", + "type": "metrics" + } + ] + }, + { + "sentence": "The Shirley Ryan AbilityLab ( formerly the Rehabilitation Institute of Chicago ) , University of California at Berkeley , MIT , Stanford University , and University of Twente in the Netherlands are the researching leaders in biomechatronics .", + "entity_list": [ + { + "name": "Shirley Ryan AbilityLab", + "type": "organization" + }, + { + "name": "Rehabilitation Institute of Chicago", + "type": "organization" + }, + { + "name": "University of California at Berkeley", + "type": "university" + }, + { + "name": "MIT", + "type": "university" + }, + { + "name": "Stanford University", + "type": "university" + }, + { + "name": "University of Twente", + "type": "university" + }, + { + "name": "Netherlands", + "type": "country" + }, + { + "name": "biomechatronics", + "type": "field" + } + ] + }, + { + "sentence": "Given a set of predicted values and a corresponding set of actual values for X for various time periods , a common evaluation technique is to use the mean squared prediction error ; other measures are also available ( see forecasting # forecasting accuracy ) .", + "entity_list": [ + { + "name": "mean squared prediction error", + "type": "metrics" + }, + { + "name": "forecasting accuracy", + "type": "metrics" + } + ] + }, + { + "sentence": "Other measures , such as the proportion of correct predictions ( also termed accuracy ) , are not useful when the two classes are of very different sizes .", + "entity_list": [ + { + "name": "accuracy", + "type": "metrics" + } + ] + }, + { + "sentence": "The first alpha version of OpenCV was released to the public at the Conference on Computer Vision and Pattern Recognition in 2000 , and five betas were released between 2001 and 2005 .", + "entity_list": [ + { + "name": "OpenCV", + "type": "product" + }, + { + "name": "Computer Vision and Pattern Recognition", + "type": "conference" + } + ] + }, + { + "sentence": "Results have been presented which give correlation of up to 0.964 with human judgement at the corpus level , compared to BLEU ' s achievement of 0.817 on the same data set .", + "entity_list": [ + { + "name": "BLEU", + "type": "metrics" + } + ] + }, + { + "sentence": "An early version of VMAF has been shown to outperform other image and video quality metrics such as SSIM , PSNR -HVS and VQM-VFD on three of four datasets in terms of prediction accuracy , when compared to subjective ratings .", + "entity_list": [ + { + "name": "VMAF", + "type": "metrics" + }, + { + "name": "SSIM", + "type": "metrics" + }, + { + "name": "PSNR -HVS", + "type": "metrics" + }, + { + "name": "VQM-VFD", + "type": "metrics" + }, + { + "name": "accuracy", + "type": "metrics" + } + ] + }, + { + "sentence": "For example , the ambiguity of ' mouse ' ( animal or device ) is not relevant in machine translation , but is relevant in information retrieval .", + "entity_list": [ + { + "name": "machine translation", + "type": "task" + }, + { + "name": "information retrieval", + "type": "task" + } + ] + }, + { + "sentence": "Geometric hashing was originally suggested in computer vision for object recognition in 2D and 3D ,", + "entity_list": [ + { + "name": "Geometric hashing", + "type": "algorithm" + }, + { + "name": "computer vision", + "type": "field" + }, + { + "name": "object recognition", + "type": "task" + } + ] + }, + { + "sentence": "It forms one of the three main categories of machine learning , along with supervised learning and reinforcement learning .", + "entity_list": [ + { + "name": "machine learning", + "type": "field" + }, + { + "name": "supervised learning", + "type": "field" + }, + { + "name": "reinforcement learning", + "type": "field" + } + ] + }, + { + "sentence": "Reinforcement learning , due to its generality , is studied in many other disciplines , such as game , control theory , operations research , information theory , simulation-based optimization , multi-agent systems , swarm intelligence , statistics and genetic algorithm s .", + "entity_list": [ + { + "name": "Reinforcement learning", + "type": "field" + }, + { + "name": "game", + "type": "field" + }, + { + "name": "control theory", + "type": "field" + }, + { + "name": "operations research", + "type": "field" + }, + { + "name": "information theory", + "type": "field" + }, + { + "name": "simulation-based optimization", + "type": "field" + }, + { + "name": "multi-agent systems", + "type": "field" + }, + { + "name": "swarm intelligence", + "type": "field" + }, + { + "name": "statistics", + "type": "field" + }, + { + "name": "genetic algorithm", + "type": "algorithm" + } + ] + }, + { + "sentence": "Pattern recognition is closely related to artificial intelligence and machine learning ,", + "entity_list": [ + { + "name": "Pattern recognition", + "type": "field" + }, + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "machine learning", + "type": "field" + } + ] + }, + { + "sentence": "The software is used to design , train and deploy neural network ( supervised learning and unsupervised learning ) models to perform a wide variety of tasks such as data mining , classification , function approximation , multivariate regression and time-series prediction .", + "entity_list": [ + { + "name": "neural network", + "type": "algorithm" + }, + { + "name": "supervised learning", + "type": "field" + }, + { + "name": "unsupervised learning", + "type": "field" + }, + { + "name": "data mining", + "type": "field" + }, + { + "name": "classification", + "type": "task" + }, + { + "name": "function approximation", + "type": "task" + }, + { + "name": "multivariate regression", + "type": "algorithm" + }, + { + "name": "time-series prediction", + "type": "task" + } + ] + }, + { + "sentence": "In 2016 , he was elected Fellow of Association for the Advancement of Artificial Intelligence .", + "entity_list": [ + { + "name": "Association for the Advancement of Artificial Intelligence", + "type": "conference" + } + ] + }, + { + "sentence": "She serves as a member of the National Academy of Sciences ( since 2005 ) , American Academy of Arts and Sciences ( since 2009 ) ,", + "entity_list": [ + { + "name": "National Academy of Sciences", + "type": "organization" + }, + { + "name": "American Academy of Arts and Sciences", + "type": "organization" + } + ] + }, + { + "sentence": "During the 1973 Yom Kippur War , Soviet-supplied surface-to-air missile batteries in Egypt and Syria caused heavy damage Israeli fighter jet s .", + "entity_list": [ + { + "name": "Yom Kippur War", + "type": "else" + }, + { + "name": "surface-to-air missile", + "type": "product" + }, + { + "name": "Egypt", + "type": "country" + }, + { + "name": "Syria", + "type": "country" + }, + { + "name": "Israeli", + "type": "else" + } + ] + }, + { + "sentence": "Another resource ( free but copyrighted ) is the HTK book ( and the accompanying HTK toolkit ) .", + "entity_list": [ + { + "name": "HTK book", + "type": "product" + }, + { + "name": "HTK toolkit", + "type": "product" + } + ] + }, + { + "sentence": "- were taken in the 2004 AAAI Spring Symposium where linguists , computer scientists , and other interested researchers first aligned interests and proposed shared tasks and benchmark data sets for the systematic computational research on affect , appeal , subjectivity , and sentiment in text .", + "entity_list": [ + { + "name": "2004 AAAI", + "type": "conference" + } + ] + }, + { + "sentence": "A single grid can be analysed for both content ( eyeball inspection ) and structure ( cluster analysis , principal component analysis , and a variety of structural indices relating to the complexity and range of the ratings being the chief techniques used ) .", + "entity_list": [ + { + "name": "eyeball inspection", + "type": "task" + }, + { + "name": "cluster analysis", + "type": "task" + }, + { + "name": "principal component analysis", + "type": "task" + } + ] + }, + { + "sentence": "In 2018 Toyota was regarded as being behind in Self-driving car and in need of innovation .", + "entity_list": [ + { + "name": "Toyota", + "type": "organization" + }, + { + "name": "Self-driving car", + "type": "product" + } + ] + }, + { + "sentence": "Such targets include natural objects such as ground , sea , precipitation ( such as rain , snow or hail ) , sand storm s , animals ( especially birds ) , atmospheric turbulence , and other atmospheric effects , such as ionosphere reflections , meteor trails , and three body scatter spike .", + "entity_list": [ + { + "name": "ionosphere reflections", + "type": "else" + }, + { + "name": "meteor trails", + "type": "else" + }, + { + "name": "three body scatter spike", + "type": "else" + } + ] + }, + { + "sentence": "In planning and control , the essential difference between humanoids and other kinds of robots ( like industrial ones ) is that the movement of the robot must be human-like , using legged locomotion , especially biped gait .", + "entity_list": [ + { + "name": "industrial", + "type": "product" + }, + { + "name": "biped gait", + "type": "else" + } + ] + }, + { + "sentence": "The gradient descent can take many iterations to compute a local minimum with a required accuracy , if the curvature in different directions is very different for the given function .", + "entity_list": [ + { + "name": "gradient descent", + "type": "algorithm" + }, + { + "name": "local minimum", + "type": "else" + }, + { + "name": "accuracy", + "type": "metrics" + }, + { + "name": "curvature", + "type": "else" + } + ] + }, + { + "sentence": "The 1997 RoboCup 2D Soccer Simulation League was the first RoboCup competition promoted in conjunction with International Joint Conference on Artificial Intelligence held in Nagoya , Japan , from 23 to 29 August 1997 .", + "entity_list": [ + { + "name": "1997 RoboCup 2D Soccer Simulation League", + "type": "else" + }, + { + "name": "RoboCup", + "type": "else" + }, + { + "name": "International Joint Conference on Artificial Intelligence", + "type": "conference" + }, + { + "name": "Nagoya", + "type": "location" + }, + { + "name": "Japan", + "type": "country" + } + ] + }, + { + "sentence": "Other programming options include an embedded Python environment , and an R Console plus support for Rserve .", + "entity_list": [ + { + "name": "Python", + "type": "program language" + }, + { + "name": "R", + "type": "program language" + }, + { + "name": "Rserve", + "type": "product" + } + ] + }, + { + "sentence": "From Bonn he has contributed fundamentally to artificial intelligence and robotics ( with Wolfram Burgard , Dieter Fox , Sebastian Thrun among his students ) , and to the development of software engineering , particularly in civil engineering , and information systems , particularly in the geosciences. won the AAAI Classic Paper award of 2016.2014 .", + "entity_list": [ + { + "name": "Bonn", + "type": "researcher" + }, + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "robotics", + "type": "field" + }, + { + "name": "Wolfram Burgard", + "type": "researcher" + }, + { + "name": "Dieter Fox", + "type": "researcher" + }, + { + "name": "Sebastian Thrun", + "type": "researcher" + }, + { + "name": "software engineering", + "type": "field" + }, + { + "name": "civil engineering", + "type": "field" + }, + { + "name": "information systems", + "type": "field" + }, + { + "name": "geosciences.", + "type": "field" + }, + { + "name": "AAAI Classic Paper award", + "type": "else" + } + ] + }, + { + "sentence": "The first USA edition of Campus Party will take place from 20 to 22 of August at TCF Center in Detroit , Michigan .", + "entity_list": [ + { + "name": "USA edition of Campus Party", + "type": "conference" + }, + { + "name": "TCF Center", + "type": "location" + }, + { + "name": "Detroit", + "type": "location" + }, + { + "name": "Michigan", + "type": "location" + } + ] + }, + { + "sentence": "Together with Yann LeCun , and Yoshua Bengio , Hinton won the 2018 Turing Award for conceptual and engineering breakthroughs that have made deep neural networks a critical component of computing .", + "entity_list": [ + { + "name": "Yann LeCun", + "type": "researcher" + }, + { + "name": "Yoshua Bengio", + "type": "researcher" + }, + { + "name": "Hinton", + "type": "researcher" + }, + { + "name": "Turing Award", + "type": "else" + }, + { + "name": "deep neural networks", + "type": "algorithm" + } + ] + }, + { + "sentence": "Euler Math Toolbox uses a matrix language similar to MATLAB , a system that had been under development since the 1970s .", + "entity_list": [ + { + "name": "Euler Math Toolbox", + "type": "product" + }, + { + "name": "MATLAB", + "type": "product" + } + ] + }, + { + "sentence": "Some languages make it possible portably ( e.g. Scheme , Common Lisp , Perl or D ) .", + "entity_list": [ + { + "name": "Scheme", + "type": "program language" + }, + { + "name": "Common Lisp", + "type": "program language" + }, + { + "name": "Perl", + "type": "program language" + }, + { + "name": "D", + "type": "program language" + } + ] + }, + { + "sentence": "In 1969 a famous book entitled Perceptrons by Marvin Minsky and Seymour Papert showed that it was impossible for these classes of network to learn an XOR function .", + "entity_list": [ + { + "name": "Perceptrons", + "type": "else" + }, + { + "name": "Marvin Minsky", + "type": "researcher" + }, + { + "name": "Seymour Papert", + "type": "researcher" + }, + { + "name": "XOR function", + "type": "else" + } + ] + }, + { + "sentence": "Large numbers of Russian scientific and technical documents were translated using SYSTRAN under the auspices of the USAF Foreign Technology Division ( later the National Air and Space Intelligence Center ) at Wright-Patterson Air Force Base , Ohio .", + "entity_list": [ + { + "name": "Russian", + "type": "else" + }, + { + "name": "SYSTRAN", + "type": "product" + }, + { + "name": "USAF Foreign Technology Division", + "type": "organization" + }, + { + "name": "National Air and Space Intelligence Center", + "type": "organization" + }, + { + "name": "Wright-Patterson Air Force Base", + "type": "location" + }, + { + "name": "Ohio", + "type": "location" + } + ] + }, + { + "sentence": "Semi-supervised learning falls between unsupervised learning ( without any labeled training data ) and supervised learning ( with completely labeled training data ) .", + "entity_list": [ + { + "name": "Semi-supervised learning", + "type": "field" + }, + { + "name": "unsupervised learning", + "type": "field" + }, + { + "name": "supervised learning", + "type": "field" + } + ] + }, + { + "sentence": "An n -gram model is a type of probabilistic language model for predicting the next item in such a sequence in the form of a ( n − 1 ) -order Markov model .efficiently .", + "entity_list": [ + { + "name": "n -gram model", + "type": "algorithm" + }, + { + "name": "probabilistic language model", + "type": "algorithm" + }, + { + "name": "Markov model", + "type": "algorithm" + } + ] + }, + { + "sentence": "The Cleveland Clinic has used Cyc to develop a natural language query interface of biomedical information , spanning decades of information on cardiothoracic surgeries .", + "entity_list": [ + { + "name": "Cleveland Clinic", + "type": "location" + }, + { + "name": "Cyc", + "type": "product" + }, + { + "name": "natural language query interface of biomedical information", + "type": "product" + } + ] + }, + { + "sentence": "The incident strained relations between the United States and Japan , and resulted in the arrest and prosecution two senior executives , as well as the imposition of sanctions on the company by both countries .", + "entity_list": [ + { + "name": "United States", + "type": "country" + }, + { + "name": "Japan", + "type": "country" + } + ] + }, + { + "sentence": "If the modeling is done by an artificial neural network or other machine learning , the optimization of parameters is called training , while the optimization of model hyperparameters is called tuning and often uses cross-validation ..", + "entity_list": [ + { + "name": "artificial neural network", + "type": "algorithm" + }, + { + "name": "machine learning", + "type": "field" + }, + { + "name": "training", + "type": "else" + }, + { + "name": "tuning", + "type": "else" + }, + { + "name": "cross-validation", + "type": "algorithm" + } + ] + }, + { + "sentence": "Localized versions of the site available in the United Kingdom , India , and Australia were discontinued following the acquisition of Rotten Tomatoes by Fandango .", + "entity_list": [ + { + "name": "United Kingdom", + "type": "country" + }, + { + "name": "India", + "type": "country" + }, + { + "name": "Australia", + "type": "country" + }, + { + "name": "Rotten Tomatoes", + "type": "organization" + }, + { + "name": "Fandango", + "type": "organization" + } + ] + }, + { + "sentence": "The NER model is one of a number of methods for determining the accuracy of live subtitles in television broadcasts and events that are produced using speech recognition .", + "entity_list": [ + { + "name": "NER", + "type": "task" + }, + { + "name": "accuracy", + "type": "metrics" + }, + { + "name": "speech recognition", + "type": "task" + } + ] + }, + { + "sentence": "Atran has taught at Cambridge University , Hebrew University in Jerusalem , the École pratique des hautes études and École Polytechnique in Paris , and John Jay College of Criminal Justice in New York City .", + "entity_list": [ + { + "name": "Atran", + "type": "researcher" + }, + { + "name": "Cambridge University", + "type": "university" + }, + { + "name": "Hebrew University", + "type": "university" + }, + { + "name": "Jerusalem", + "type": "location" + }, + { + "name": "École pratique des hautes études", + "type": "university" + }, + { + "name": "École Polytechnique", + "type": "university" + }, + { + "name": "Paris", + "type": "location" + }, + { + "name": "John Jay College of Criminal Justice", + "type": "university" + }, + { + "name": "New York City", + "type": "location" + } + ] + }, + { + "sentence": "SHRDLU was an early natural language understanding computer program , developed by Terry Winograd at MIT in 1968-1970", + "entity_list": [ + { + "name": "SHRDLU", + "type": "product" + }, + { + "name": "natural language understanding", + "type": "task" + }, + { + "name": "Terry Winograd", + "type": "researcher" + }, + { + "name": "MIT", + "type": "university" + } + ] + }, + { + "sentence": "He received a B.E. in electronics engineering from B.M.S. College of Engineering in Bangalore , India in 1982 , when it was affiliated with Bangalore University , an M.S. in electrical and computer engineering in 1984 from Drexel University , and an M.S. in computer science in 1989 , and a Ph.D. in 1990 , respectively , from the University of Wisconsin-Madison , where he studied Artificial Intelligence and worked with Leonard Uhr .", + "entity_list": [ + { + "name": "B.E.", + "type": "else" + }, + { + "name": "electronics engineering", + "type": "field" + }, + { + "name": "B.M.S. College of Engineering", + "type": "university" + }, + { + "name": "Bangalore", + "type": "location" + }, + { + "name": "India", + "type": "country" + }, + { + "name": "Bangalore University", + "type": "university" + }, + { + "name": "M.S.", + "type": "else" + }, + { + "name": "electrical and computer engineering", + "type": "field" + }, + { + "name": "Drexel University", + "type": "university" + }, + { + "name": "computer science", + "type": "field" + }, + { + "name": "Ph.D.", + "type": "else" + }, + { + "name": "University of Wisconsin-Madison", + "type": "university" + }, + { + "name": "Artificial Intelligence", + "type": "field" + }, + { + "name": "Leonard Uhr", + "type": "researcher" + } + ] + }, + { + "sentence": "Accuracy is usually rated with word error rate ( WER ) , whereas speed is measured with the real time factor .", + "entity_list": [ + { + "name": "word error rate", + "type": "metrics" + }, + { + "name": "WER", + "type": "metrics" + }, + { + "name": "real time factor", + "type": "metrics" + } + ] + }, + { + "sentence": "In 1971 Terry Winograd developed an early natural language processing engine capable of interpreting naturally written commands within a simple rule-governed environment .", + "entity_list": [ + { + "name": "Terry Winograd", + "type": "researcher" + }, + { + "name": "natural language processing", + "type": "field" + } + ] + }, + { + "sentence": "In artificial intelligence , Marvin Minsky , Herbert A. Simon , and Allen Newell are prominent .", + "entity_list": [ + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "Marvin Minsky", + "type": "researcher" + }, + { + "name": "Herbert A. Simon", + "type": "researcher" + }, + { + "name": "Allen Newell", + "type": "researcher" + } + ] + }, + { + "sentence": "In the latter half of the 20th century , electrical engineering itself separated into several disciplines , specialising in the design and analysis of systems that manipulate physical signals ; electronic engineering and computer engineering as examples ; while design engineering developed to deal with functional design of user-machine interfaces .", + "entity_list": [ + { + "name": "electrical engineering", + "type": "field" + }, + { + "name": "electronic engineering", + "type": "field" + }, + { + "name": "computer engineering", + "type": "field" + }, + { + "name": "design engineering", + "type": "field" + }, + { + "name": "user-machine interfaces", + "type": "else" + } + ] + }, + { + "sentence": "Perhaps the simplest statistic is accuracy or Fraction Correct ( FC ) , which measures the fraction of all instances that are correctly categorized ; it is the ratio of the number of correct classifications to the total number of correct or incorrect classifications : ( TP + TN ) / Total Population = ( TP + TN ) / ( TP + TN + FP + FN ) .", + "entity_list": [ + { + "name": "accuracy", + "type": "metrics" + }, + { + "name": "Fraction Correct", + "type": "metrics" + }, + { + "name": "FC", + "type": "metrics" + }, + { + "name": "TP + TN", + "type": "metrics" + }, + { + "name": "TP + TN + FP + FN", + "type": "metrics" + } + ] + }, + { + "sentence": "In the academic community , the major forums for research started in 1995 when the First International Conference Data Mining and Knowledge Discovery ( KDD-95 ) was started in Montreal under AAAI sponsorship .", + "entity_list": [ + { + "name": "First International Conference Data Mining and Knowledge Discovery", + "type": "conference" + }, + { + "name": "KDD-95", + "type": "conference" + }, + { + "name": "Montreal", + "type": "location" + }, + { + "name": "AAAI", + "type": "conference" + } + ] + }, + { + "sentence": "In this approach , models are developed using different data mining , machine learning algorithms to predict users ' rating of unrated items .", + "entity_list": [ + { + "name": "data mining", + "type": "field" + }, + { + "name": "machine learning", + "type": "field" + } + ] + }, + { + "sentence": "In light of the above discussion , we see that the SVM technique is equivalent to empirical risk with Tikhonov regularization , where in this case the loss function is the hinge loss", + "entity_list": [ + { + "name": "SVM", + "type": "algorithm" + }, + { + "name": "empirical risk", + "type": "algorithm" + }, + { + "name": "Tikhonov regularization", + "type": "algorithm" + }, + { + "name": "loss function", + "type": "else" + } + ] + }, + { + "sentence": "The 2015 edition was hosted by Molly McGrath , with Chris Rose and former UFC fighter Kenny Florian as commentators .", + "entity_list": [ + { + "name": "Molly McGrath", + "type": "person" + }, + { + "name": "Chris Rose", + "type": "person" + }, + { + "name": "UFC", + "type": "organization" + }, + { + "name": "Kenny Florian", + "type": "person" + } + ] + }, + { + "sentence": "A subset called Micro-Planner was implemented by Gerald Jay Sussman , Eugene Charniak and Terry Winograd Sussman , , and Winograd 1971 and was used in Winograd 's natural-language understanding program SHRDLU , Eugene Charniak 's story understanding work , Thorne McCarty 's work on legal reasoning , and some other projects .", + "entity_list": [ + { + "name": "Micro-Planner", + "type": "product" + }, + { + "name": "Gerald Jay Sussman", + "type": "researcher" + }, + { + "name": "Eugene Charniak", + "type": "researcher" + }, + { + "name": "Terry Winograd", + "type": "researcher" + }, + { + "name": "Sussman", + "type": "researcher" + }, + { + "name": "Winograd", + "type": "researcher" + }, + { + "name": "natural-language understanding", + "type": "task" + }, + { + "name": "SHRDLU", + "type": "product" + }, + { + "name": "story understanding", + "type": "task" + }, + { + "name": "Thorne McCarty", + "type": "researcher" + }, + { + "name": "legal reasoning", + "type": "task" + } + ] + }, + { + "sentence": "WordNet has been used for a number of purposes in information systems , including word-sense disambiguation , information retrieval , automatic text classification , Automatic summarization , machine translation and even automatic crossword puzzle generation .", + "entity_list": [ + { + "name": "WordNet", + "type": "product" + }, + { + "name": "information systems", + "type": "product" + }, + { + "name": "word-sense disambiguation", + "type": "task" + }, + { + "name": "information retrieval", + "type": "task" + }, + { + "name": "automatic text classification", + "type": "task" + }, + { + "name": "Automatic summarization", + "type": "task" + }, + { + "name": "machine translation", + "type": "task" + }, + { + "name": "automatic crossword puzzle generation", + "type": "task" + } + ] + }, + { + "sentence": "Keutzer was named a Fellow of the IEEE in 1996 .", + "entity_list": [ + { + "name": "Keutzer", + "type": "researcher" + }, + { + "name": "IEEE", + "type": "organization" + } + ] + }, + { + "sentence": "A widely used type of composition is the nonlinear weighted sum , where math \\ textstyle f ( x ) = K \\ left ( \\ sum _ i w _ i g _ i ( x ) \\ right ) / math , where math \\ textstyle K / math ( commonly referred to as the activation function ) is some predefined function , such as the hyperbolic tangent , sigmoid function , softmax function , or rectifier function .", + "entity_list": [ + { + "name": "nonlinear weighted sum", + "type": "algorithm" + }, + { + "name": "activation function", + "type": "else" + }, + { + "name": "hyperbolic tangent", + "type": "algorithm" + }, + { + "name": "sigmoid function", + "type": "algorithm" + }, + { + "name": "softmax function", + "type": "algorithm" + }, + { + "name": "rectifier function", + "type": "algorithm" + } + ] + }, + { + "sentence": "In the film Westworld , female robots actually engaged in intercourse with human men as part of the make-believe vacation world human customers paid to attend .", + "entity_list": [ + { + "name": "Westworld", + "type": "else" + } + ] + }, + { + "sentence": "Typically , the process starts by terminology extraction and concepts or noun phrase s from plain text using linguistic processors such as part-of-speech tagging and phrase chunking .", + "entity_list": [ + { + "name": "terminology extraction", + "type": "task" + }, + { + "name": "part-of-speech tagging", + "type": "task" + }, + { + "name": "phrase chunking", + "type": "task" + } + ] + }, + { + "sentence": "They demonstrated its performance on a number of problems of interest to the machine learning community , including handwriting recognition .", + "entity_list": [ + { + "name": "machine learning", + "type": "field" + }, + { + "name": "handwriting recognition", + "type": "task" + } + ] + }, + { + "sentence": "While studying at Stanford , Scheinman was awarded a fellowship sponsored by George Devol , the inventor of the Unimate , the first industrial robot .", + "entity_list": [ + { + "name": "Stanford", + "type": "university" + }, + { + "name": "Scheinman", + "type": "researcher" + }, + { + "name": "George Devol", + "type": "researcher" + }, + { + "name": "Unimate", + "type": "product" + }, + { + "name": "industrial robot", + "type": "product" + } + ] + }, + { + "sentence": "While originally used to evaluate machine translations , bilingual evaluation understudy ( BLEU ) has been used successfully to evaluate paraphrase generation models as well .", + "entity_list": [ + { + "name": "machine translations", + "type": "task" + }, + { + "name": "bilingual evaluation understudy", + "type": "metrics" + }, + { + "name": "BLEU", + "type": "metrics" + }, + { + "name": "paraphrase generation models", + "type": "product" + } + ] + }, + { + "sentence": "Unimation later licensed their technology to Kawasaki Heavy Industries and GKN , manufacturing Unimate s in Japan and England respectively .", + "entity_list": [ + { + "name": "Unimation", + "type": "organization" + }, + { + "name": "Kawasaki Heavy Industries", + "type": "organization" + }, + { + "name": "GKN", + "type": "organization" + }, + { + "name": "Unimate", + "type": "product" + }, + { + "name": "Japan", + "type": "country" + }, + { + "name": "England", + "type": "country" + } + ] + }, + { + "sentence": "Much of the confusion between these two research communities ( which do often have separate conferences and separate journals , ECML PKDD being a major exception ) comes from the basic assumptions they work with : in machine learning , performance is usually evaluated with respect to the ability to reproduce known knowledge , while in knowledge discovery and data mining ( KDD ) the key task is the discovery of previously unknown knowledge .", + "entity_list": [ + { + "name": "ECML PKDD", + "type": "conference" + }, + { + "name": "machine learning", + "type": "field" + }, + { + "name": "knowledge discovery and data mining", + "type": "conference" + }, + { + "name": "KDD", + "type": "conference" + } + ] + }, + { + "sentence": "Hidden Markov model s are the basis for most modern automatic speech recognition systems .", + "entity_list": [ + { + "name": "Hidden Markov model", + "type": "algorithm" + }, + { + "name": "automatic speech recognition systems", + "type": "product" + } + ] + }, + { + "sentence": ", a company in Bangalore , India specializing in online handwriting recognition software .", + "entity_list": [ + { + "name": "Bangalore", + "type": "location" + }, + { + "name": "India", + "type": "country" + }, + { + "name": "handwriting recognition", + "type": "task" + } + ] + }, + { + "sentence": "Do repeated translations converge on a single expression in both languages ? I.e. does the translation method show stationarity or produce a canonical form ? Does the translation become stationary without losing the original meaning ? This metric has been criticized as not being well correlated with BLEU ( BiLingual Evaluation Understudy ) scores .", + "entity_list": [ + { + "name": "canonical form", + "type": "else" + }, + { + "name": "BLEU", + "type": "metrics" + }, + { + "name": "BiLingual Evaluation Understudy", + "type": "metrics" + } + ] + }, + { + "sentence": "He holds fellowships in the American Association for Artificial Intelligence , the Center for Advanced Study in the Behavioral Sciences at Stanford University , the MIT Center for Cognitive Science , the Canadian Institute for Advanced Research , the Canadian Psychological Association , and was elected Fellow of the Royal Society of Canada in 1998 .", + "entity_list": [ + { + "name": "American Association for Artificial Intelligence", + "type": "conference" + }, + { + "name": "Center for Advanced Study in the Behavioral Sciences", + "type": "organization" + }, + { + "name": "Stanford University", + "type": "university" + }, + { + "name": "MIT", + "type": "university" + }, + { + "name": "Cognitive Science", + "type": "field" + }, + { + "name": "Canadian Institute for Advanced Research", + "type": "organization" + }, + { + "name": "Canadian Psychological Association", + "type": "organization" + }, + { + "name": "Royal Society of Canada", + "type": "organization" + } + ] + }, + { + "sentence": "Hinton - together with Yoshua Bengio and Yann LeCun - are referred to by some as the Godfathers of AI and Godfathers of Deep Learning .", + "entity_list": [ + { + "name": "Hinton", + "type": "researcher" + }, + { + "name": "Yoshua Bengio", + "type": "researcher" + }, + { + "name": "Yann LeCun", + "type": "researcher" + }, + { + "name": "Godfathers of AI", + "type": "else" + }, + { + "name": "Godfathers of Deep Learning", + "type": "else" + } + ] + }, + { + "sentence": "The lightweight open-source speech project eSpeak , which has its own approach to synthesis , has experimented with Mandarin and Cantonese. eSpeak was used by Google Translate from May 20102010 .", + "entity_list": [ + { + "name": "eSpeak", + "type": "product" + }, + { + "name": "Mandarin", + "type": "else" + }, + { + "name": "Cantonese.", + "type": "else" + }, + { + "name": "Google Translate", + "type": "product" + } + ] + }, + { + "sentence": "Also released in 1982 , Software Automatic Mouth was the first commercial all-software voice synthesis program .", + "entity_list": [ + { + "name": "Software Automatic Mouth", + "type": "product" + }, + { + "name": "synthesis program", + "type": "task" + } + ] + }, + { + "sentence": "The column ratios are TRUE Positive Rate ( TPR , aka Sensitivity or recall ) ( TP / ( TP + FN ) ) , with complement the FALSE Negative Rate ( FNR ) ( FN / ( TP + FN ) ) ; and TRUE Negative Rate ( TNR , aka Specificity , SPC ) ( TN / ( TN + FP ) ) , with complement FALSE Positive Rate ( FPR ) ( FP / ( TN + FP ) ) .", + "entity_list": [ + { + "name": "TRUE Positive Rate", + "type": "metrics" + }, + { + "name": "TPR", + "type": "metrics" + }, + { + "name": "Sensitivity", + "type": "metrics" + }, + { + "name": "recall", + "type": "metrics" + }, + { + "name": "TP / ( TP + FN )", + "type": "metrics" + }, + { + "name": "FALSE Negative Rate", + "type": "metrics" + }, + { + "name": "FNR", + "type": "metrics" + }, + { + "name": "FN / ( TP + FN )", + "type": "metrics" + }, + { + "name": "TRUE Negative Rate", + "type": "metrics" + }, + { + "name": "TNR", + "type": "metrics" + }, + { + "name": "Specificity", + "type": "metrics" + }, + { + "name": "SPC", + "type": "metrics" + }, + { + "name": "TN / ( TN + FP )", + "type": "metrics" + }, + { + "name": "FALSE Positive Rate", + "type": "metrics" + }, + { + "name": "FPR", + "type": "metrics" + }, + { + "name": "FP / ( TN + FP )", + "type": "metrics" + } + ] + }, + { + "sentence": "Edsinger and Weber collaborated on many other robots as well , and their experience working with the Kismet", + "entity_list": [ + { + "name": "Edsinger", + "type": "person" + }, + { + "name": "Weber", + "type": "organization" + } + ] + }, + { + "sentence": "R functionality is accessible from several scripting languages such as Python , are available as well .", + "entity_list": [ + { + "name": "R", + "type": "program language" + }, + { + "name": "Python", + "type": "program language" + } + ] + }, + { + "sentence": "VAL was one of the first robot languages and was used in Unimate robots .", + "entity_list": [ + { + "name": "VAL", + "type": "program language" + }, + { + "name": "Unimate robots", + "type": "product" + } + ] + }, + { + "sentence": "They presented their database for the first time as a poster at the 2009 Conference on Computer Vision and Pattern Recognition ( CVPR ) in Florida .", + "entity_list": [ + { + "name": "2009 Conference on Computer Vision and Pattern Recognition", + "type": "conference" + }, + { + "name": "CVPR", + "type": "conference" + }, + { + "name": "Florida", + "type": "location" + } + ] + }, + { + "sentence": "Categorization tasks in which no labels are supplied are referred to as unsupervised classification , unsupervised learning , Cluster analysis .", + "entity_list": [ + { + "name": "Categorization tasks", + "type": "else" + }, + { + "name": "unsupervised classification", + "type": "task" + }, + { + "name": "unsupervised learning", + "type": "field" + }, + { + "name": "Cluster analysis", + "type": "task" + } + ] + }, + { + "sentence": "It needs to Object recognition , recognize and locate humans and further emotion recognition .", + "entity_list": [ + { + "name": "Object recognition", + "type": "task" + }, + { + "name": "emotion recognition", + "type": "task" + } + ] + }, + { + "sentence": "The process is complex and contains encoding and recall or retrieval .", + "entity_list": [ + { + "name": "encoding", + "type": "else" + }, + { + "name": "recall", + "type": "else" + }, + { + "name": "retrieval", + "type": "else" + } + ] + }, + { + "sentence": "Also known as parallel robots , or generalized Stewart platforms ( in the Stewart platform , the actuators are paired together on both the basis and the platform ) , these systems are articulated robot s that use similar mechanisms for the movement of either the robot on its base , or one or more manipulator arms .", + "entity_list": [ + { + "name": "Stewart platforms", + "type": "product" + }, + { + "name": "Stewart platform", + "type": "product" + }, + { + "name": "articulated robot", + "type": "product" + } + ] + }, + { + "sentence": "Machine vision as a systems engineering discipline can be considered distinct from computer vision , a form of computer science .", + "entity_list": [ + { + "name": "Machine vision", + "type": "field" + }, + { + "name": "systems engineering", + "type": "field" + }, + { + "name": "computer vision", + "type": "field" + }, + { + "name": "computer science", + "type": "field" + } + ] + }, + { + "sentence": "The activation function of the LSTM gates is often the logistic sigmoid function .", + "entity_list": [ + { + "name": "LSTM gates", + "type": "algorithm" + }, + { + "name": "logistic sigmoid function", + "type": "algorithm" + } + ] + }, + { + "sentence": "In other words , the sample mean is the ( necessarily unique ) efficient estimator , and thus also the minimum variance unbiased estimator ( MVUE ) , in addition to being the maximum likelihood estimator .", + "entity_list": [ + { + "name": "sample mean", + "type": "metrics" + }, + { + "name": "minimum variance unbiased estimator", + "type": "metrics" + }, + { + "name": "MVUE", + "type": "metrics" + }, + { + "name": "maximum likelihood estimator", + "type": "metrics" + } + ] + }, + { + "sentence": "The 2001 Scientific American article by Berners-Lee , James Hendler , and Ora Lassila described an expected evolution of the existing Web to a Semantic Web .", + "entity_list": [ + { + "name": "Scientific American", + "type": "else" + }, + { + "name": "Berners-Lee", + "type": "researcher" + }, + { + "name": "James Hendler", + "type": "researcher" + }, + { + "name": "Ora Lassila", + "type": "researcher" + }, + { + "name": "Web", + "type": "product" + }, + { + "name": "Semantic Web", + "type": "product" + } + ] + }, + { + "sentence": "Blade Runner used a number of then-lesser-known actors : Sean Young portrays Rachael , an experimental replicant implanted with the memories of Tyrell 's niece , causing her to believe she is human ; Sammon , pp. 92-93 Nina Axelrod auditioned for the role .", + "entity_list": [ + { + "name": "Blade Runner", + "type": "else" + }, + { + "name": "Sean Young", + "type": "person" + }, + { + "name": "Rachael", + "type": "person" + }, + { + "name": "Tyrell", + "type": "person" + }, + { + "name": "Sammon", + "type": "person" + }, + { + "name": "Nina Axelrod", + "type": "person" + } + ] + }, + { + "sentence": "Gerry Sussman , Eugene Charniak , Seymour Papert and Terry Winograd visited the University of Edinburgh in 1971 spreading the news about Micro-Planner and SHRDLU and casting doubt on the resolution uniform proof procedure approach that had been the mainstay of the Edinburgh Logicists .", + "entity_list": [ + { + "name": "Gerry Sussman", + "type": "researcher" + }, + { + "name": "Eugene Charniak", + "type": "researcher" + }, + { + "name": "Seymour Papert", + "type": "researcher" + }, + { + "name": "Terry Winograd", + "type": "researcher" + }, + { + "name": "University of Edinburgh", + "type": "university" + }, + { + "name": "Micro-Planner", + "type": "product" + }, + { + "name": "SHRDLU", + "type": "product" + }, + { + "name": "Edinburgh", + "type": "location" + } + ] + }, + { + "sentence": "Walter 's work inspired subsequent generations of robotics researchers such as Rodney Brooks , Hans Moravec and Mark Tilden .", + "entity_list": [ + { + "name": "Walter", + "type": "researcher" + }, + { + "name": "robotics", + "type": "field" + }, + { + "name": "Rodney Brooks", + "type": "researcher" + }, + { + "name": "Hans Moravec", + "type": "researcher" + }, + { + "name": "Mark Tilden", + "type": "researcher" + } + ] + }, + { + "sentence": "Subsequently , a similar GPU-based CNN by Alex Krizhevsky et al. won the ImageNet Large Scale Visual Recognition Challenge 2012 .", + "entity_list": [ + { + "name": "CNN", + "type": "algorithm" + }, + { + "name": "Alex Krizhevsky", + "type": "researcher" + }, + { + "name": "ImageNet Large Scale Visual Recognition Challenge 2012", + "type": "conference" + } + ] + }, + { + "sentence": "Commonly used loss functions for probabilistic classification include log loss and the Brier score between the predicted and the TRUE probability distributions .", + "entity_list": [ + { + "name": "loss functions", + "type": "else" + }, + { + "name": "log loss", + "type": "metrics" + }, + { + "name": "Brier score", + "type": "metrics" + }, + { + "name": "TRUE probability", + "type": "else" + } + ] + }, + { + "sentence": "In May 2016 , NtechLab was admitted to the official testing of biometrics technology by NIST among the three Russian companies .", + "entity_list": [ + { + "name": "NtechLab", + "type": "organization" + }, + { + "name": "biometrics", + "type": "field" + }, + { + "name": "NIST", + "type": "organization" + }, + { + "name": "Russian", + "type": "else" + } + ] + }, + { + "sentence": "However , floating-point numbers have only a certain amount of mathematical precision .", + "entity_list": [] + }, + { + "sentence": "During 2015 , many of SenseTime 's papers were accepted into the Conference on Computer Vision and Pattern Recognition ( CVPR ) .", + "entity_list": [ + { + "name": "SenseTime", + "type": "organization" + }, + { + "name": "Conference on Computer Vision and Pattern Recognition", + "type": "conference" + }, + { + "name": "CVPR", + "type": "conference" + } + ] + }, + { + "sentence": "He co-developed optimal algorithms for Structure From Motion ( SFM , or Visual SLAM , simultaneous localization and mapping , in Robotics ; Best Paper Award at Conference on Computer Vision and Pattern Recognition 1998 ) , characterized its ambiguities ( David Marr Prize at ICCV 1999 ) , also characterized the identifiability and observability of visual-inertial sensor fusion ( Best Paper Award at Robotics 2015 ) .", + "entity_list": [ + { + "name": "Structure From Motion", + "type": "task" + }, + { + "name": "SFM", + "type": "task" + }, + { + "name": "Visual SLAM", + "type": "task" + }, + { + "name": "simultaneous localization and mapping", + "type": "task" + }, + { + "name": "Robotics", + "type": "field" + }, + { + "name": "Best Paper Award", + "type": "else" + }, + { + "name": "Conference on Computer Vision and Pattern Recognition", + "type": "conference" + }, + { + "name": "David Marr Prize", + "type": "else" + }, + { + "name": "ICCV 1999", + "type": "conference" + } + ] + }, + { + "sentence": "Stephen H. Muggleton FBCS , FIET , Association for the Advancement of Artificial Intelligence ,", + "entity_list": [ + { + "name": "Stephen H. Muggleton", + "type": "researcher" + }, + { + "name": "FBCS", + "type": "organization" + }, + { + "name": "FIET", + "type": "organization" + }, + { + "name": "Association for the Advancement of Artificial Intelligence", + "type": "conference" + } + ] + }, + { + "sentence": "Edge detection is a fundamental tool in image processing , machine vision and computer vision , particularly in the areas of feature detection and feature extraction .", + "entity_list": [ + { + "name": "Edge detection", + "type": "task" + }, + { + "name": "image processing", + "type": "field" + }, + { + "name": "machine vision", + "type": "field" + }, + { + "name": "computer vision", + "type": "field" + }, + { + "name": "feature detection", + "type": "task" + }, + { + "name": "feature extraction", + "type": "task" + } + ] + }, + { + "sentence": "An example of this would be a variable such as outside temperature ( mathtemp / math ) , which in a given application might be recorded to several decimal places of precision ( depending on the sensing apparatus ) .", + "entity_list": [ + { + "name": "outside temperature", + "type": "else" + }, + { + "name": "decimal places of precision", + "type": "else" + } + ] + }, + { + "sentence": "The returning judges are Fon Davis , Jessica Chobot , and Leland Melvin , as well as celebrity guest judges actor Clark Gregg , MythBusters host and former Battlebots builder Adam Savage , NFL tightend Vernon Davis , and YouTube star Michael Stevens a.k.a. Vsauce .", + "entity_list": [ + { + "name": "Fon Davis", + "type": "person" + }, + { + "name": "Jessica Chobot", + "type": "person" + }, + { + "name": "Leland Melvin", + "type": "person" + }, + { + "name": "Clark Gregg", + "type": "person" + }, + { + "name": "MythBusters", + "type": "else" + }, + { + "name": "Battlebots", + "type": "else" + }, + { + "name": "Adam Savage", + "type": "person" + }, + { + "name": "NFL", + "type": "organization" + }, + { + "name": "Vernon Davis", + "type": "person" + }, + { + "name": "YouTube", + "type": "organization" + }, + { + "name": "Michael Stevens", + "type": "person" + }, + { + "name": "Vsauce", + "type": "person" + } + ] + }, + { + "sentence": "But these methods never won over the non-uniform internal-handcrafting Gaussian mixture model / Hidden Markov model ( GMM-HMM ) technology based on generative models of speech trained discriminatively .", + "entity_list": [ + { + "name": "Gaussian mixture model", + "type": "algorithm" + }, + { + "name": "Hidden Markov model", + "type": "algorithm" + }, + { + "name": "GMM-HMM", + "type": "algorithm" + } + ] + }, + { + "sentence": "Software packages like MATLAB , GNU Octave , Scilab , and SciPy provide convenient ways to apply these different methods .", + "entity_list": [ + { + "name": "MATLAB", + "type": "product" + }, + { + "name": "GNU Octave", + "type": "program language" + }, + { + "name": "Scilab", + "type": "program language" + }, + { + "name": "SciPy", + "type": "product" + } + ] + }, + { + "sentence": "Linear predictive coding ( LPC ) , a speech processing algorithm , was first proposed by Fumitada Itakura of Nagoya University and Shuzo Saito of Nippon Telegraph and Telephone ( NTT ) in 1966 .", + "entity_list": [ + { + "name": "Linear predictive coding", + "type": "algorithm" + }, + { + "name": "LPC", + "type": "algorithm" + }, + { + "name": "speech processing", + "type": "task" + }, + { + "name": "Fumitada Itakura", + "type": "researcher" + }, + { + "name": "Nagoya University", + "type": "university" + }, + { + "name": "Shuzo Saito", + "type": "researcher" + }, + { + "name": "Nippon Telegraph and Telephone", + "type": "organization" + }, + { + "name": "NTT", + "type": "organization" + } + ] + }, + { + "sentence": "In 2006 , for the 25th anniversary of the algorithm , a workshop was organized at the International Conference on Computer Vision and Pattern Recognition ( CVPR ) to summarize the most recent contributions and variations to the original algorithm , mostly meant to improve the speed of the algorithm , the robustness and accuracy of the estimated solution and to decrease the dependency from user defined constants .", + "entity_list": [ + { + "name": "International Conference on Computer Vision and Pattern Recognition", + "type": "conference" + }, + { + "name": "CVPR", + "type": "conference" + } + ] + }, + { + "sentence": "The members went to the University of Debrecen , the Hungarian Academy of Sciences , Eötvös Loránd University , etc .", + "entity_list": [ + { + "name": "University of Debrecen", + "type": "university" + }, + { + "name": "Hungarian Academy of Sciences", + "type": "organization" + }, + { + "name": "Eötvös Loránd University", + "type": "university" + } + ] + }, + { + "sentence": "To extend SVM to cases in which the data are not linearly separable , we introduce the loss function ,", + "entity_list": [ + { + "name": "SVM", + "type": "algorithm" + }, + { + "name": "loss function", + "type": "else" + } + ] + }, + { + "sentence": "Logo is an educational programming language , designed in 1967 by Wally Feurzeig , Seymour Papert , and Cynthia Solomon .", + "entity_list": [ + { + "name": "Logo", + "type": "program language" + }, + { + "name": "Wally Feurzeig", + "type": "researcher" + }, + { + "name": "Seymour Papert", + "type": "researcher" + }, + { + "name": "Cynthia Solomon", + "type": "researcher" + } + ] + }, + { + "sentence": "Eyring Research Institute was instrumental to the U.S. Air Force Missile Directorate at Hill Air Force Base near Ogden , Utah to produce in top military secrecy , the Intelligent Systems Technology Software that was foundational to the later named Reagan Star Wars program .", + "entity_list": [ + { + "name": "Eyring Research Institute", + "type": "organization" + }, + { + "name": "U.S. Air Force Missile Directorate", + "type": "organization" + }, + { + "name": "Hill Air Force Base", + "type": "location" + }, + { + "name": "Ogden", + "type": "location" + }, + { + "name": "Utah", + "type": "location" + }, + { + "name": "Intelligent Systems Technology Software", + "type": "product" + }, + { + "name": "Reagan Star Wars program", + "type": "product" + } + ] + }, + { + "sentence": "Over the decades he has researched and developed emerging fields of computer science from compiler , programming languages and system architecture John F. Sowa and John Zachman ( 1992 ) .", + "entity_list": [ + { + "name": "computer science", + "type": "field" + }, + { + "name": "John F. Sowa", + "type": "researcher" + }, + { + "name": "John Zachman", + "type": "researcher" + } + ] + }, + { + "sentence": "The Sobel operator , sometimes called the Sobel-Feldman operator or Sobel filter , is used in image processing and computer vision , particularly within edge detection algorithms where it creates an image emphasising edges .", + "entity_list": [ + { + "name": "Sobel operator", + "type": "algorithm" + }, + { + "name": "Sobel-Feldman operator", + "type": "algorithm" + }, + { + "name": "Sobel filter", + "type": "algorithm" + }, + { + "name": "image processing", + "type": "field" + }, + { + "name": "computer vision", + "type": "field" + }, + { + "name": "edge detection algorithms", + "type": "else" + } + ] + }, + { + "sentence": "LDA is a supervised learning algorithm that utilizes the labels of the data , while PCA is an learning algorithm that ignores the labels .", + "entity_list": [ + { + "name": "LDA", + "type": "algorithm" + }, + { + "name": "supervised learning", + "type": "field" + }, + { + "name": "PCA", + "type": "algorithm" + } + ] + }, + { + "sentence": "Other linear classification algorithms include Winnow , support vector machine and logistic regression .", + "entity_list": [ + { + "name": "Winnow", + "type": "algorithm" + }, + { + "name": "support vector machine", + "type": "algorithm" + }, + { + "name": "logistic regression", + "type": "algorithm" + } + ] + }, + { + "sentence": "VTK consists of a C + + class library and several interpreted interface layers including Tcl / Tk , Java , and Python .", + "entity_list": [ + { + "name": "VTK", + "type": "product" + }, + { + "name": "C + +", + "type": "program language" + }, + { + "name": "Tcl / Tk", + "type": "product" + }, + { + "name": "Java", + "type": "program language" + }, + { + "name": "Python", + "type": "program language" + } + ] + }, + { + "sentence": "Also , text produced by processing spontaneous speech using automatic speech recognition and printed or handwritten text using optical character recognition contains processing noise .", + "entity_list": [ + { + "name": "automatic speech recognition", + "type": "task" + }, + { + "name": "optical character recognition", + "type": "task" + } + ] + }, + { + "sentence": "Miller wrote several books and directed the development of WordNet , an online word-linkage database usable by computer programs .", + "entity_list": [ + { + "name": "Miller", + "type": "researcher" + }, + { + "name": "WordNet", + "type": "product" + } + ] + }, + { + "sentence": "Contemporary automata are represented by the works of Cabaret Mechanical Theatre in the United Kingdom , Dug North and Chomick + Meder , Arthur Ganson , Joe Jones in the United States , Le Défenseur du Temps by French artist Jacques Monestier , and François Junod in Switzerland .", + "entity_list": [ + { + "name": "automata", + "type": "field" + }, + { + "name": "Cabaret Mechanical Theatre", + "type": "organization" + }, + { + "name": "United Kingdom", + "type": "country" + }, + { + "name": "Dug North", + "type": "person" + }, + { + "name": "Chomick + Meder", + "type": "person" + }, + { + "name": "Arthur Ganson", + "type": "person" + }, + { + "name": "Joe Jones", + "type": "person" + }, + { + "name": "United States", + "type": "country" + }, + { + "name": "Le Défenseur du Temps", + "type": "location" + }, + { + "name": "French", + "type": "else" + }, + { + "name": "Jacques Monestier", + "type": "person" + }, + { + "name": "François Junod", + "type": "person" + }, + { + "name": "Switzerland", + "type": "country" + } + ] + }, + { + "sentence": "MATLAB does include standard codefor / code and codewhile / code loops , but ( as in other similar applications such as R ) , using the vectorized notation is encouraged and is often faster to execute .", + "entity_list": [ + { + "name": "MATLAB", + "type": "product" + }, + { + "name": "R", + "type": "program language" + } + ] + }, + { + "sentence": "Pausch received two awards from Association for Computing Machinery in 2007 for his achievements in computing education : the Karl V. Karlstrom Outstanding Educator Award and the ACM SIGCSE Award for Outstanding Contributions to Computer Science Education .", + "entity_list": [ + { + "name": "Pausch", + "type": "researcher" + }, + { + "name": "Association for Computing Machinery", + "type": "conference" + }, + { + "name": "computing education", + "type": "field" + }, + { + "name": "Karl V. Karlstrom Outstanding Educator Award", + "type": "else" + }, + { + "name": "ACM SIGCSE Award for Outstanding Contributions to Computer Science Education", + "type": "else" + } + ] + }, + { + "sentence": "In 1960 , Devol personally sold the first Unimate robot , which was shipped in 1961 to General Motors .", + "entity_list": [ + { + "name": "Devol", + "type": "person" + }, + { + "name": "Unimate", + "type": "product" + }, + { + "name": "robot", + "type": "product" + }, + { + "name": "General Motors", + "type": "organization" + } + ] + }, + { + "sentence": "Semantic networks are used in natural language processing applications such as semantic parsing .", + "entity_list": [ + { + "name": "Semantic networks", + "type": "algorithm" + }, + { + "name": "natural language processing", + "type": "field" + }, + { + "name": "semantic parsing", + "type": "task" + } + ] + }, + { + "sentence": "Some successful applications of deep learning are computer vision and speech recognition . Honglak Lee , Roger Grosse , Rajesh Ranganath , Andrew Y. Ng .", + "entity_list": [ + { + "name": "deep learning", + "type": "field" + }, + { + "name": "computer vision", + "type": "field" + }, + { + "name": "speech recognition", + "type": "task" + }, + { + "name": "Honglak Lee", + "type": "researcher" + }, + { + "name": "Roger Grosse", + "type": "researcher" + }, + { + "name": "Rajesh Ranganath", + "type": "researcher" + }, + { + "name": "Andrew Y. Ng", + "type": "researcher" + } + ] + }, + { + "sentence": "In addition to maintaining the Discovery One spacecraft systems during the interplanetary mission to Jupiter ( or Saturn in the novel ) , HAL is capable of speech synthesis , speech recognition , facial recognition , natural language processing , lip reading , art appreciation , Affective computing , automated reasoning , spacecraft piloting and playing chess .", + "entity_list": [ + { + "name": "Discovery One spacecraft systems", + "type": "product" + }, + { + "name": "Jupiter", + "type": "else" + }, + { + "name": "Saturn", + "type": "else" + }, + { + "name": "HAL", + "type": "product" + }, + { + "name": "speech synthesis", + "type": "task" + }, + { + "name": "speech recognition", + "type": "task" + }, + { + "name": "facial recognition", + "type": "task" + }, + { + "name": "natural language processing", + "type": "field" + }, + { + "name": "lip reading", + "type": "task" + }, + { + "name": "art appreciation", + "type": "field" + }, + { + "name": "Affective computing", + "type": "task" + }, + { + "name": "automated reasoning", + "type": "task" + }, + { + "name": "spacecraft piloting", + "type": "task" + }, + { + "name": "playing chess", + "type": "task" + } + ] + }, + { + "sentence": "Dr. Julesz emigrated from Hungary to the United States following the 1956 Soviet invasion .", + "entity_list": [ + { + "name": "Dr. Julesz", + "type": "researcher" + }, + { + "name": "Hungary", + "type": "country" + }, + { + "name": "the United States", + "type": "country" + }, + { + "name": "Soviet", + "type": "country" + } + ] + }, + { + "sentence": "Sigmoid function activation functions use a second non-linearity for large inputs : math \\ phi ( v _ i ) = ( 1 + \\ exp ( -v _ i ) ) ^ { -1 } / math .", + "entity_list": [ + { + "name": "Sigmoid function", + "type": "algorithm" + } + ] + }, + { + "sentence": "These probabilities are used to determine what the target is using a maximum likelihood decision .", + "entity_list": [ + { + "name": "maximum likelihood decision", + "type": "algorithm" + } + ] + }, + { + "sentence": "In 1984 he moved to the University of Konstanz and in 1990 to the University of Salzburg .", + "entity_list": [ + { + "name": "University of Konstanz", + "type": "university" + }, + { + "name": "University of Salzburg", + "type": "university" + } + ] + }, + { + "sentence": "Some popular fitness functions based on the confusion matrix include sensitivity / specificity , recall / precision , F-measure , Jaccard similarity , Matthews correlation coefficient , and cost / gain matrix which combines the costs and gains assigned to the 4 different types of classifications .", + "entity_list": [ + { + "name": "confusion matrix", + "type": "metrics" + }, + { + "name": "sensitivity / specificity", + "type": "metrics" + }, + { + "name": "recall / precision", + "type": "metrics" + }, + { + "name": "F-measure", + "type": "metrics" + }, + { + "name": "Jaccard similarity", + "type": "metrics" + }, + { + "name": "Matthews correlation coefficient", + "type": "metrics" + }, + { + "name": "cost / gain matrix", + "type": "metrics" + } + ] + }, + { + "sentence": "Common numerical programming environments such as MATLAB , SciLab , NumPy , Sklearn and the R language provide some of the simpler feature extraction techniques ( e.g. principal component analysis ) via built-in commands .", + "entity_list": [ + { + "name": "MATLAB", + "type": "product" + }, + { + "name": "SciLab", + "type": "product" + }, + { + "name": "NumPy", + "type": "product" + }, + { + "name": "Sklearn", + "type": "product" + }, + { + "name": "R language", + "type": "program language" + }, + { + "name": "principal component analysis", + "type": "algorithm" + } + ] + }, + { + "sentence": "Industrial robots have been implemented to collaborate with humans to perform industrial manufacturing tasks .", + "entity_list": [ + { + "name": "Industrial robots", + "type": "product" + } + ] + }, + { + "sentence": "In the first published paper on CGs , John F. Sowa applied them to a wide range of topics in artificial intelligence , computer science , and cognitive science .", + "entity_list": [ + { + "name": "CGs", + "type": "field" + }, + { + "name": "John F. Sowa", + "type": "researcher" + }, + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "computer science", + "type": "field" + }, + { + "name": "cognitive science", + "type": "field" + } + ] + }, + { + "sentence": "NIST also differs from BLEU in its calculation of the brevity penalty , insofar as small variations in translation length do not impact the overall score as much .", + "entity_list": [ + { + "name": "NIST", + "type": "metrics" + }, + { + "name": "BLEU", + "type": "metrics" + }, + { + "name": "brevity penalty", + "type": "else" + } + ] + }, + { + "sentence": "The IJCAI Award for Research Excellence is a biannual award given at the IJCAI conference to researcher in artificial intelligence as a recognition of excellence of their career .", + "entity_list": [ + { + "name": "IJCAI Award for Research Excellence", + "type": "else" + }, + { + "name": "IJCAI", + "type": "conference" + }, + { + "name": "artificial intelligence", + "type": "field" + } + ] + }, + { + "sentence": "Lenat was one of the original Fellows of the AAAI , and is the only individual to have on the Scientific Advisory Boards of both Microsoft and Apple .", + "entity_list": [ + { + "name": "Lenat", + "type": "researcher" + }, + { + "name": "AAAI", + "type": "conference" + }, + { + "name": "Scientific Advisory Boards of both Microsoft and Apple", + "type": "organization" + } + ] + }, + { + "sentence": "Autoencoders are trained to minimise reconstruction errors ( such as Mean squared error ) , often referred to as the loss :", + "entity_list": [ + { + "name": "Autoencoders", + "type": "algorithm" + }, + { + "name": "Mean squared error", + "type": "metrics" + }, + { + "name": "loss", + "type": "else" + } + ] + }, + { + "sentence": "An alternative to the use of the definitions is to consider general word-sense relatedness and to compute the similarity of each pair of word senses based on a given lexical knowledge base such as WordNet .", + "entity_list": [ + { + "name": "lexical knowledge base", + "type": "else" + }, + { + "name": "WordNet", + "type": "product" + } + ] + }, + { + "sentence": "TD-Lambda is a learning algorithm invented by Richard S. Sutton based on earlier work on temporal difference learning by Arthur Samuel .", + "entity_list": [ + { + "name": "TD-Lambda", + "type": "algorithm" + }, + { + "name": "Richard S. Sutton", + "type": "researcher" + }, + { + "name": "Arthur Samuel", + "type": "researcher" + } + ] + }, + { + "sentence": "In data mining and statistics , hierarchical clustering ( also called hierarchical cluster analysis or HCA ) is a method of cluster analysis which seeks to build a hierarchy of clusters .", + "entity_list": [ + { + "name": "data mining", + "type": "field" + }, + { + "name": "statistics", + "type": "field" + }, + { + "name": "hierarchical clustering", + "type": "task" + }, + { + "name": "hierarchical cluster analysis", + "type": "task" + }, + { + "name": "HCA", + "type": "task" + }, + { + "name": "cluster analysis", + "type": "task" + } + ] + }, + { + "sentence": "The concept of deconvolution is widely used in the techniques of signal processing and image processing .", + "entity_list": [ + { + "name": "deconvolution", + "type": "algorithm" + }, + { + "name": "signal processing", + "type": "field" + }, + { + "name": "image processing", + "type": "field" + } + ] + }, + { + "sentence": "Cognitive maps serve the construction and accumulation of spatial knowledge , allowing the mind 's eye to visualize images in order to reduce cognitive load , enhance recall and learning of information .", + "entity_list": [ + { + "name": "Cognitive maps", + "type": "algorithm" + }, + { + "name": "cognitive load", + "type": "else" + }, + { + "name": "recall", + "type": "metrics" + } + ] + }, + { + "sentence": ", typically providing bindings to languages such as Python , C + + , Java ) .", + "entity_list": [ + { + "name": "Python", + "type": "program language" + }, + { + "name": "C + +", + "type": "program language" + }, + { + "name": "Java", + "type": "program language" + } + ] + }, + { + "sentence": "A voice-user interface ( VUI ) makes spoken human interaction with computers possible , using speech recognition to understand spoken commands and Question answering , and typically text to speech to play a reply .", + "entity_list": [ + { + "name": "voice-user interface", + "type": "product" + }, + { + "name": "VUI", + "type": "product" + }, + { + "name": "speech recognition", + "type": "task" + }, + { + "name": "Question answering", + "type": "task" + }, + { + "name": "text to speech", + "type": "task" + } + ] + }, + { + "sentence": "Jess is a rule engine for the Java platform that was developed by Ernest Friedman-Hill of Sandia National .", + "entity_list": [ + { + "name": "Jess", + "type": "program language" + }, + { + "name": "rule engine", + "type": "else" + }, + { + "name": "Java", + "type": "program language" + }, + { + "name": "Ernest Friedman-Hill", + "type": "researcher" + }, + { + "name": "Sandia National", + "type": "organization" + } + ] + }, + { + "sentence": "For multilayer perceptron s , where a hidden layer exists , more sophisticated algorithms such as backpropagation must be used .", + "entity_list": [ + { + "name": "multilayer perceptron", + "type": "algorithm" + }, + { + "name": "backpropagation", + "type": "algorithm" + } + ] + }, + { + "sentence": "Google Translate 's neural machine translation system uses a large end-to-end artificial neural network that attempts to perform deep learning , in particular , long short-term memory networks .", + "entity_list": [ + { + "name": "Google Translate", + "type": "product" + }, + { + "name": "neural machine translation system", + "type": "product" + }, + { + "name": "end-to-end artificial neural network", + "type": "algorithm" + }, + { + "name": "deep learning", + "type": "field" + }, + { + "name": "long short-term memory networks", + "type": "algorithm" + } + ] + }, + { + "sentence": "Various methods for doing so were developed in the 1980s and early 1990s by Werbos , Williams , Robinson , Jürgen Schmidhuber , Sepp Hochreiter , Pearlmutter and others .", + "entity_list": [ + { + "name": "Werbos", + "type": "researcher" + }, + { + "name": "Williams", + "type": "researcher" + }, + { + "name": "Robinson", + "type": "researcher" + }, + { + "name": "Jürgen Schmidhuber", + "type": "researcher" + }, + { + "name": "Sepp Hochreiter", + "type": "researcher" + }, + { + "name": "Pearlmutter", + "type": "researcher" + } + ] + }, + { + "sentence": "| Apple Apple Inc originally licensed software from Nuance to provide speech recognition capability to its digital assistant Siri .", + "entity_list": [ + { + "name": "Apple", + "type": "organization" + }, + { + "name": "Apple Inc", + "type": "organization" + }, + { + "name": "Nuance", + "type": "organization" + }, + { + "name": "speech recognition", + "type": "task" + }, + { + "name": "Siri", + "type": "product" + } + ] + }, + { + "sentence": "Columbia released several 3D westerns produced by Sam Katzman and directed by William Castle .", + "entity_list": [ + { + "name": "Columbia", + "type": "organization" + }, + { + "name": "3D westerns", + "type": "else" + }, + { + "name": "Sam Katzman", + "type": "person" + }, + { + "name": "William Castle", + "type": "person" + } + ] + }, + { + "sentence": "It incorporates knowledge and research in the computer science , linguistics and computer engineering fields .", + "entity_list": [ + { + "name": "computer science", + "type": "field" + }, + { + "name": "linguistics", + "type": "field" + }, + { + "name": "computer engineering", + "type": "field" + } + ] + }, + { + "sentence": "Here is an example of R code :", + "entity_list": [ + { + "name": "R", + "type": "program language" + } + ] + }, + { + "sentence": "The ROC curve is created by plotting the TRUE positive rate ( TPR ) against the FALSE positive rate ( FPR ) at various threshold settings .", + "entity_list": [ + { + "name": "ROC curve", + "type": "metrics" + }, + { + "name": "TRUE positive rate", + "type": "metrics" + }, + { + "name": "TPR", + "type": "metrics" + }, + { + "name": "FALSE positive rate", + "type": "metrics" + }, + { + "name": "FPR", + "type": "metrics" + } + ] + }, + { + "sentence": "Research stagnated after machine learning research by Marvin Minsky and Seymour Papert ( 1969 ) ,", + "entity_list": [ + { + "name": "machine learning", + "type": "field" + }, + { + "name": "Marvin Minsky", + "type": "researcher" + }, + { + "name": "Seymour Papert", + "type": "researcher" + } + ] + }, + { + "sentence": "Other programming environments that are used to build DAQ applications include ladder logic , Visual C + + , Visual Basic , LabVIEW , and MATLAB .", + "entity_list": [ + { + "name": "DAQ", + "type": "task" + }, + { + "name": "ladder logic", + "type": "program language" + }, + { + "name": "Visual C + +", + "type": "product" + }, + { + "name": "Visual Basic", + "type": "program language" + }, + { + "name": "LabVIEW", + "type": "product" + }, + { + "name": "MATLAB", + "type": "product" + } + ] + }, + { + "sentence": "The metric was designed to fix some of the problems found in the more popular BLEU metric , and also produce good correlation with human judgement at the sentence or segment level .", + "entity_list": [ + { + "name": "BLEU metric", + "type": "metrics" + } + ] + }, + { + "sentence": "Techniques such as dynamic Markov Networks , Convolutional neural network and Long short-term memory are often employed to exploit the semantic correlations between consecutive video frames .", + "entity_list": [ + { + "name": "dynamic Markov Networks", + "type": "algorithm" + }, + { + "name": "Convolutional neural network", + "type": "algorithm" + }, + { + "name": "Long short-term memory", + "type": "algorithm" + } + ] + }, + { + "sentence": "Mass-produced printed circuit board s ( PCBs ) are almost exclusively manufactured by pick-and-place robots , typically with SCARA manipulators , which remove tiny electronic component s from strips or trays , and place them on to PCBs with great accuracy .", + "entity_list": [ + { + "name": "printed circuit board", + "type": "product" + }, + { + "name": "PCBs", + "type": "product" + }, + { + "name": "pick-and-place robots", + "type": "product" + }, + { + "name": "SCARA", + "type": "product" + } + ] + }, + { + "sentence": "In the context of machine learning , where it is most widely applied today , LDA was rediscovered independently by David Blei , Andrew Ng and Michael I. Jordan in 2003 , and presented as a graphical model for topic discovery .", + "entity_list": [ + { + "name": "machine learning", + "type": "field" + }, + { + "name": "LDA", + "type": "algorithm" + }, + { + "name": "David Blei", + "type": "researcher" + }, + { + "name": "Andrew Ng", + "type": "researcher" + }, + { + "name": "Michael I. Jordan", + "type": "researcher" + }, + { + "name": "graphical model", + "type": "algorithm" + }, + { + "name": "topic discovery", + "type": "task" + } + ] + }, + { + "sentence": "The measured performance on test data of eight naive WSI across various tauopathies resulted in the recall , precision , and an F1 score of 0.92 , 0.72 , and 0.81 , respectively .", + "entity_list": [ + { + "name": "WSI", + "type": "task" + }, + { + "name": "tauopathies", + "type": "else" + }, + { + "name": "recall", + "type": "metrics" + }, + { + "name": "precision", + "type": "metrics" + }, + { + "name": "F1 score", + "type": "metrics" + } + ] + }, + { + "sentence": "With the help of advanced AR technologies ( e.g. adding computer vision , incorporating AR cameras into smartphone and object recognition ) the information about the surrounding real world of the user becomes interactive and digitally manipulated .", + "entity_list": [ + { + "name": "AR", + "type": "field" + }, + { + "name": "computer vision", + "type": "field" + }, + { + "name": "object recognition", + "type": "task" + } + ] + }, + { + "sentence": "In 2014 , Schmidhuber formed a company , Nnaisense , to work on commercial applications of artificial intelligence in fields such as finance , heavy industry and self-driving car s .", + "entity_list": [ + { + "name": "Schmidhuber", + "type": "researcher" + }, + { + "name": "Nnaisense", + "type": "organization" + }, + { + "name": "artificial intelligence", + "type": "field" + }, + { + "name": "self-driving car", + "type": "product" + } + ] + }, + { + "sentence": "Not only does this alter the performance of all subsequent tests on the retained explanatory model , it may introduce bias and alter mean square error in estimation .", + "entity_list": [ + { + "name": "mean square error", + "type": "metrics" + } + ] + }, + { + "sentence": "Bigrams are used in most successful language model s for speech recognition .", + "entity_list": [ + { + "name": "Bigrams", + "type": "else" + }, + { + "name": "language model", + "type": "algorithm" + }, + { + "name": "speech recognition", + "type": "task" + } + ] + }, + { + "sentence": "His research in cognitive psychology has won the Early Career Award ( 1984 ) and Boyd McCandless Award 1986 ) from the American Psychological Association , the Troland Research Award ( 1993 ) from the National Academy of Sciences , the Henry Dale Prize ( 2004 ) from the Royal Institution of Great Britain , and the George Miller Prize ( 2010 ) from the Cognitive Neuroscience Society .", + "entity_list": [ + { + "name": "cognitive psychology", + "type": "field" + }, + { + "name": "Early Career Award", + "type": "else" + }, + { + "name": "Boyd McCandless Award", + "type": "else" + }, + { + "name": "American Psychological Association", + "type": "organization" + }, + { + "name": "Troland Research Award", + "type": "else" + }, + { + "name": "National Academy of Sciences", + "type": "organization" + }, + { + "name": "Henry Dale Prize", + "type": "else" + }, + { + "name": "Royal Institution of Great Britain", + "type": "organization" + }, + { + "name": "George Miller Prize", + "type": "else" + }, + { + "name": "Cognitive Neuroscience Society", + "type": "organization" + } + ] + }, + { + "sentence": "An eigenface ( The approach of using eigenfaces for Facial recognition system was developed by Sirovich and Kirby ( 1987 ) and used by Matthew Turk and Alex Pentland in face classification . Turk , Matthew A and Pentland , Alex P. Face recognition using eigenfaces .", + "entity_list": [ + { + "name": "eigenface", + "type": "else" + }, + { + "name": "eigenfaces", + "type": "else" + }, + { + "name": "Facial recognition system", + "type": "product" + }, + { + "name": "Sirovich", + "type": "researcher" + }, + { + "name": "Kirby", + "type": "researcher" + }, + { + "name": "Matthew Turk", + "type": "researcher" + }, + { + "name": "Alex Pentland", + "type": "researcher" + }, + { + "name": "face classification", + "type": "task" + }, + { + "name": "Turk , Matthew A", + "type": "researcher" + }, + { + "name": "Pentland , Alex P.", + "type": "researcher" + }, + { + "name": "Face recognition", + "type": "task" + } + ] + }, + { + "sentence": "A lexical dictionary such as WordNet can then be used for understanding the context .", + "entity_list": [ + { + "name": "WordNet", + "type": "product" + } + ] + }, + { + "sentence": "Hyponymy is the most frequently encoded relation among synsets used in lexical databases such as WordNet .", + "entity_list": [ + { + "name": "Hyponymy", + "type": "else" + }, + { + "name": "synsets", + "type": "else" + }, + { + "name": "WordNet", + "type": "product" + } + ] + }, + { + "sentence": "OPeNDAP offers open-source libraries in C + + and Java , but many clients rely on community developed libraries such as libraries include embedded capabilities for retrieving ( array-style ) data from DAP servers .", + "entity_list": [ + { + "name": "OPeNDAP", + "type": "organization" + }, + { + "name": "C + +", + "type": "program language" + }, + { + "name": "Java", + "type": "program language" + }, + { + "name": "DAP", + "type": "else" + } + ] + }, + { + "sentence": "In that page , Samurai Damashii exaggerated the Senkousha as the crystallization of China 's four thousand years of scientific knowledge , commented on the crude design ( e.g. the Chinese Cannon on its crotch ) , and put its image among images of Honda ' s ASIMO and Sony ' s QRIO SDR-3X for juxtaposition .", + "entity_list": [ + { + "name": "Samurai Damashii", + "type": "else" + }, + { + "name": "Senkousha", + "type": "product" + }, + { + "name": "China", + "type": "country" + }, + { + "name": "Chinese Cannon", + "type": "else" + }, + { + "name": "Honda", + "type": "organization" + }, + { + "name": "ASIMO", + "type": "product" + }, + { + "name": "Sony", + "type": "organization" + }, + { + "name": "QRIO SDR-3X", + "type": "product" + } + ] + }, + { + "sentence": "There are also many programming libraries that contain neural network functionality and that can be used in custom implementations ( such as TensorFlow , Theano , etc .", + "entity_list": [ + { + "name": "neural network", + "type": "algorithm" + }, + { + "name": "TensorFlow", + "type": "product" + }, + { + "name": "Theano", + "type": "product" + } + ] + }, + { + "sentence": "He is a Fellow of the Association for Computing Machinery , IEEE , American Association for the Advancement of Science , IAPR and SPIE .", + "entity_list": [ + { + "name": "Association for Computing Machinery", + "type": "conference" + }, + { + "name": "IEEE", + "type": "organization" + }, + { + "name": "American Association for the Advancement of Science", + "type": "conference" + }, + { + "name": "IAPR", + "type": "conference" + }, + { + "name": "SPIE", + "type": "conference" + } + ] + }, + { + "sentence": "A trial by RET in 2011 with Facial recognition system cameras mounted on trams made sure that people were banned from the city trams did not sneak on anyway .", + "entity_list": [ + { + "name": "RET", + "type": "organization" + }, + { + "name": "Facial recognition system", + "type": "product" + } + ] + }, + { + "sentence": "The film , adapted from the popular Cole Porter Broadway musical , starred the MGM songbird team of Howard Keel and Kathryn Grayson as the leads , supported by Ann Miller , Keenan Wynn , Bobby Van , James Whitmore , Kurt Kasznar and Tommy Rall .", + "entity_list": [ + { + "name": "Cole Porter", + "type": "person" + }, + { + "name": "Broadway", + "type": "organization" + }, + { + "name": "Howard Keel", + "type": "person" + }, + { + "name": "Kathryn Grayson", + "type": "person" + }, + { + "name": "Ann Miller", + "type": "person" + }, + { + "name": "Keenan Wynn", + "type": "person" + }, + { + "name": "Bobby Van", + "type": "person" + }, + { + "name": "James Whitmore", + "type": "person" + }, + { + "name": "Kurt Kasznar", + "type": "person" + }, + { + "name": "Tommy Rall", + "type": "person" + } + ] + }, + { + "sentence": "Such applications should streamline the call flows , minimize prompts , eliminate unnecessary iterations and allow elaborate mixed initiative dialog system , which enable callers to enter several pieces of information in a single utterance and in any order or combination .", + "entity_list": [ + { + "name": "mixed initiative dialog system", + "type": "product" + } + ] + }, + { + "sentence": "As such , traditional gradient descent ( or Stochastic gradient descent ) methods can be adapted , where of taking a step in the direction of the function 's gradient , a step is taken in the direction of a vector selected from the function 's sub-gradient .", + "entity_list": [ + { + "name": "gradient descent", + "type": "algorithm" + }, + { + "name": "Stochastic gradient descent", + "type": "algorithm" + } + ] + }, + { + "sentence": "If it is assumed that distortion is measured by mean squared error , the distortion D , is given by :", + "entity_list": [ + { + "name": "mean squared error", + "type": "metrics" + }, + { + "name": "distortion D", + "type": "else" + } + ] + }, + { + "sentence": "MLPs were a popular machine learning solution in the 1980s , finding applications in diverse fields such as speech recognition , image recognition , and machine translation software , Neural networks .", + "entity_list": [ + { + "name": "MLPs", + "type": "algorithm" + }, + { + "name": "machine learning", + "type": "field" + }, + { + "name": "speech recognition", + "type": "task" + }, + { + "name": "image recognition", + "type": "task" + }, + { + "name": "machine translation", + "type": "task" + }, + { + "name": "Neural networks", + "type": "product" + } + ] + }, + { + "sentence": "Allen received his Ph.D. from the University of Toronto in 1979 , under the supervision of C. Raymond Perrault ,", + "entity_list": [ + { + "name": "Allen", + "type": "researcher" + }, + { + "name": "University of Toronto", + "type": "university" + }, + { + "name": "C. Raymond Perrault", + "type": "researcher" + } + ] + }, + { + "sentence": "OpenCV supports some models from deep learning frameworks like TensorFlow , Torch , PyTorch ( after converting to an ONNX model ) and Caffe according to a defined list of supported layers .", + "entity_list": [ + { + "name": "OpenCV", + "type": "product" + }, + { + "name": "deep learning", + "type": "field" + }, + { + "name": "TensorFlow", + "type": "product" + }, + { + "name": "Torch", + "type": "product" + }, + { + "name": "PyTorch", + "type": "product" + }, + { + "name": "ONNX", + "type": "product" + }, + { + "name": "Caffe", + "type": "product" + } + ] + }, + { + "sentence": "Previously , Christensen was the Founding Chairman of European Robotics Research Network ( EURON ) and an IEEE Robotics and Automation Society Distinguished Lecturer in Robotics .", + "entity_list": [ + { + "name": "Christensen", + "type": "researcher" + }, + { + "name": "European Robotics Research Network", + "type": "organization" + }, + { + "name": "EURON", + "type": "organization" + }, + { + "name": "IEEE Robotics and Automation Society", + "type": "organization" + }, + { + "name": "Robotics", + "type": "field" + } + ] + }, + { + "sentence": "He received his master 's degree in mathematics from the Samarkand State University , Samarkand , Uzbek Soviet Socialist Republic in 1958 and Ph.D in statistics at the Institute of Control Sciences , Moscow in 1964 .", + "entity_list": [ + { + "name": "mathematics", + "type": "field" + }, + { + "name": "Samarkand State University", + "type": "university" + }, + { + "name": "Samarkand", + "type": "location" + }, + { + "name": "Uzbek Soviet Socialist Republic", + "type": "country" + }, + { + "name": "Ph.D", + "type": "else" + }, + { + "name": "statistics", + "type": "field" + }, + { + "name": "Institute of Control Sciences", + "type": "organization" + }, + { + "name": "Moscow", + "type": "location" + } + ] + }, + { + "sentence": "Increasingly , however , work at Cycorp involves giving the Cyc system the ability to communicate with end users in natural language , and to assist with the ongoing knowledge formation process via machine learning and natural language understanding .", + "entity_list": [ + { + "name": "Cycorp", + "type": "organization" + }, + { + "name": "Cyc system", + "type": "product" + }, + { + "name": "machine learning", + "type": "field" + }, + { + "name": "natural language understanding", + "type": "task" + } + ] + }, + { + "sentence": "For example , if the most suitable classifier for the problem is sought , the training dataset is used to train the candidate algorithms , the validation dataset is used to compare their performances and decide which one to take and , finally , the test dataset is used to obtain the performance characteristics such as accuracy , sensitivity , specificity , F-measure , and so on .", + "entity_list": [ + { + "name": "accuracy", + "type": "metrics" + }, + { + "name": "sensitivity", + "type": "metrics" + }, + { + "name": "specificity", + "type": "metrics" + }, + { + "name": "F-measure", + "type": "metrics" + } + ] + }, + { + "sentence": "The Mean squared error is 0.15 .", + "entity_list": [ + { + "name": "Mean squared error", + "type": "metrics" + } + ] + }, + { + "sentence": "In 1979 a Micromouse competition was organized by the IEEE as shown in the Spectrum magazine .", + "entity_list": [ + { + "name": "Micromouse competition", + "type": "else" + }, + { + "name": "IEEE", + "type": "organization" + }, + { + "name": "Spectrum", + "type": "else" + } + ] + }, + { + "sentence": "The Gabor space is very useful in image processing applications such as optical character recognition , iris recognition and fingerprint recognition .", + "entity_list": [ + { + "name": "Gabor space", + "type": "algorithm" + }, + { + "name": "image processing", + "type": "field" + }, + { + "name": "optical character recognition", + "type": "task" + }, + { + "name": "iris recognition", + "type": "task" + }, + { + "name": "fingerprint recognition", + "type": "task" + } + ] + }, + { + "sentence": "or via high-level interfaces to Java and Tcl .", + "entity_list": [ + { + "name": "Java", + "type": "program language" + }, + { + "name": "Tcl", + "type": "program language" + } + ] + }, + { + "sentence": "In recent research , kernel-based methods such as support vector machine s have shown superior performance in supervised .", + "entity_list": [ + { + "name": "support vector machine", + "type": "algorithm" + }, + { + "name": "supervised", + "type": "field" + } + ] + }, + { + "sentence": "To illustrate the basic principles of bagging , below is an analysis on the relationship between ozone and temperature ( data from Rousseeuw and Leroy ( 1986 ) , analysis done in R ) .", + "entity_list": [ + { + "name": "ozone", + "type": "else" + }, + { + "name": "Rousseeuw", + "type": "researcher" + }, + { + "name": "Leroy", + "type": "researcher" + }, + { + "name": "R", + "type": "program language" + } + ] + }, + { + "sentence": "Denso Wave is a subsidiary that produces automatic identification products ( bar-code reader s and related products ) , industrial robot s and programmable logic controller s .", + "entity_list": [ + { + "name": "Denso Wave", + "type": "organization" + }, + { + "name": "bar-code reader", + "type": "product" + }, + { + "name": "industrial robot", + "type": "product" + }, + { + "name": "programmable logic controller", + "type": "product" + } + ] + }, + { + "sentence": "Where Bilingual evaluation understudy simply calculates n-gram precision adding equal weight to each one , NIST also calculates how informative a particular n-gram is .", + "entity_list": [ + { + "name": "Bilingual evaluation understudy", + "type": "metrics" + }, + { + "name": "n-gram precision", + "type": "metrics" + }, + { + "name": "NIST", + "type": "metrics" + }, + { + "name": "n-gram", + "type": "else" + } + ] + }, + { + "sentence": "In particular , they are used during the calculation of likelihood of a tree ( in Bayesian and maximum likelihood approaches to tree estimation ) and they are used to estimate the evolutionary distance between sequences from the observed differences between the sequences .", + "entity_list": [ + { + "name": "Bayesian", + "type": "algorithm" + }, + { + "name": "maximum likelihood", + "type": "algorithm" + } + ] + }, + { + "sentence": "The Audio Engineering Society recommends 48 kHz sampling rate for most applications but gives recognition to 44.1 kHz for Compact Disc ( CD ) and other consumer uses , 32 kHz for transmission-related applications , and 96 kHz for higher bandwidth or relaxed anti-aliasing filter ing .", + "entity_list": [ + { + "name": "Audio Engineering Society", + "type": "conference" + }, + { + "name": "Compact Disc", + "type": "else" + }, + { + "name": "CD", + "type": "else" + }, + { + "name": "anti-aliasing filter", + "type": "else" + } + ] + }, + { + "sentence": "Resources for affectivity of words and concepts have been made for WordNet { { cite journal", + "entity_list": [ + { + "name": "WordNet", + "type": "product" + } + ] + }, + { + "sentence": "In red-green anaglyph , the audience was presented three reels of tests , which included rural scenes , test shots of Marie Doro , a segment of John B. Mason playing a number of passages from Jim the Penman ( a film released by Famous Players-Lasky that year , but not in 3D ) , Oriental dancers , and a reel of footage of Niagara Falls .", + "entity_list": [ + { + "name": "red-green anaglyph", + "type": "else" + }, + { + "name": "Marie Doro", + "type": "person" + }, + { + "name": "John B. Mason", + "type": "person" + }, + { + "name": "Jim the Penman", + "type": "person" + }, + { + "name": "Famous Players-Lasky", + "type": "organization" + }, + { + "name": "Niagara Falls", + "type": "location" + } + ] + }, + { + "sentence": "This is a particular way of implementing maximum likelihood estimation for this problem .", + "entity_list": [ + { + "name": "maximum likelihood estimation", + "type": "metrics" + } + ] + }, + { + "sentence": "Crawler-friendly Web Servers , and it integrates the features of sitemaps and RSS feeds into a decentralized mechanism for computational biologists and bio-informaticians to openly broadcast and retrieve meta-data about biomedical resources .", + "entity_list": [ + { + "name": "Crawler-friendly Web Servers", + "type": "product" + }, + { + "name": "RSS", + "type": "else" + } + ] + }, + { + "sentence": "It is covered by American National Standards Institute / NISO standard Z39.50 , and International Organization for Standardization standard 23950 .", + "entity_list": [ + { + "name": "American National Standards Institute / NISO standard Z39.50", + "type": "else" + }, + { + "name": "International Organization for Standardization standard 23950", + "type": "else" + } + ] + }, + { + "sentence": "The encoder and decoder are trained to take a phrase and reproduce the one-hot distribution of a corresponding paraphrase by minimizing perplexity using simple stochastic gradient descent .", + "entity_list": [ + { + "name": "one-hot distribution", + "type": "else" + }, + { + "name": "perplexity", + "type": "metrics" + }, + { + "name": "stochastic gradient descent", + "type": "algorithm" + } + ] + }, + { + "sentence": "Other typical applications of pattern recognition techniques are automatic speech recognition , classification of text into several categories ( e.g. , spam / non-spam email messages ) , the handwriting recognition on postal envelopes , automatic recognition of images of human faces , or handwriting image extraction from medical forms .", + "entity_list": [ + { + "name": "pattern recognition", + "type": "field" + }, + { + "name": "automatic speech recognition", + "type": "task" + }, + { + "name": "classification of text into several categories", + "type": "task" + }, + { + "name": "handwriting recognition on postal envelopes", + "type": "task" + }, + { + "name": "automatic recognition of images of human faces", + "type": "task" + }, + { + "name": "handwriting image extraction from medical forms", + "type": "task" + } + ] + }, + { + "sentence": "Artificial neural networks have been used on a variety of tasks , including computer vision , speech recognition , machine translation , social network filtering , playing board and video games and medical diagnosis .", + "entity_list": [ + { + "name": "Artificial neural networks", + "type": "algorithm" + }, + { + "name": "computer vision", + "type": "field" + }, + { + "name": "speech recognition", + "type": "task" + }, + { + "name": "machine translation", + "type": "task" + }, + { + "name": "social network filtering", + "type": "task" + }, + { + "name": "playing board and video games", + "type": "task" + }, + { + "name": "medical diagnosis", + "type": "task" + } + ] + }, + { + "sentence": "Examples include Salford Systems CART ( which licensed the proprietary code of the original CART authors ) , IBM SPSS Modeler , RapidMiner , SAS Enterprise Miner , Matlab , R ( an open-source software environment for statistical computing , which includes several CART implementations such as rpart , party and randomForest packages ) , Weka ( a free and open-source data-mining suite , contains many decision tree algorithms ) , Orange , KNIME , Microsoft SQL Server programming language ) .", + "entity_list": [ + { + "name": "Salford Systems", + "type": "organization" + }, + { + "name": "CART", + "type": "product" + }, + { + "name": "IBM", + "type": "organization" + }, + { + "name": "SPSS Modeler", + "type": "product" + }, + { + "name": "RapidMiner", + "type": "product" + }, + { + "name": "SAS Enterprise Miner", + "type": "product" + }, + { + "name": "Matlab", + "type": "product" + }, + { + "name": "R", + "type": "program language" + }, + { + "name": "statistical computing", + "type": "field" + }, + { + "name": "rpart", + "type": "algorithm" + }, + { + "name": "party", + "type": "algorithm" + }, + { + "name": "randomForest", + "type": "algorithm" + }, + { + "name": "Weka", + "type": "product" + }, + { + "name": "data-mining", + "type": "task" + }, + { + "name": "decision tree", + "type": "algorithm" + }, + { + "name": "Orange", + "type": "product" + }, + { + "name": "KNIME", + "type": "product" + }, + { + "name": "Microsoft SQL Server", + "type": "product" + } + ] + }, + { + "sentence": "Linear predictive coding ( LPC ) was first developed by Fumitada Itakura of Nagoya University and Shuzo Saito of Nippon Telegraph and Telephone ( NTT ) in 1966 , and then further developed by Bishnu S. Atal and Manfred R. Schroeder at Bell Labs during the early-to-mid-1970s , becoming a basis for the first speech synthesizer DSP chips in the late 1970s .", + "entity_list": [ + { + "name": "Linear predictive coding", + "type": "algorithm" + }, + { + "name": "LPC", + "type": "algorithm" + }, + { + "name": "Fumitada Itakura", + "type": "researcher" + }, + { + "name": "Nagoya University", + "type": "university" + }, + { + "name": "Shuzo Saito", + "type": "researcher" + }, + { + "name": "Nippon Telegraph and Telephone", + "type": "organization" + }, + { + "name": "NTT", + "type": "organization" + }, + { + "name": "Bishnu S. Atal", + "type": "researcher" + }, + { + "name": "Manfred R. Schroeder", + "type": "researcher" + }, + { + "name": "Bell Labs", + "type": "organization" + }, + { + "name": "speech synthesizer DSP chips", + "type": "product" + } + ] + }, + { + "sentence": "An F-score is a combination of the precision and the recall , providing a single score .", + "entity_list": [ + { + "name": "F-score", + "type": "metrics" + }, + { + "name": "precision", + "type": "metrics" + }, + { + "name": "recall", + "type": "metrics" + } + ] + }, + { + "sentence": "Image analysis tasks can be as simple as reading bar code d tags or as sophisticated as facial recognition system .", + "entity_list": [ + { + "name": "Image analysis", + "type": "field" + }, + { + "name": "reading bar code d tags", + "type": "task" + }, + { + "name": "facial recognition system", + "type": "product" + } + ] + }, + { + "sentence": "The special case of linear support-vector machines can be solved more efficiently by the same kind of algorithms to optimize its close cousin , logistic regression ; this class of algorithms includes Stochastic gradient descent ( e.g. , PEGASOS ) .", + "entity_list": [ + { + "name": "support-vector machines", + "type": "algorithm" + }, + { + "name": "logistic regression", + "type": "algorithm" + }, + { + "name": "Stochastic gradient descent", + "type": "algorithm" + }, + { + "name": "PEGASOS", + "type": "algorithm" + } + ] + }, + { + "sentence": "When Siri on an iOS device is asked Do you have a pet ? , one the responses is I used to have an AIBO .", + "entity_list": [ + { + "name": "Siri", + "type": "product" + }, + { + "name": "iOS", + "type": "product" + }, + { + "name": "AIBO", + "type": "product" + } + ] + }, + { + "sentence": "In information retrieval , the positive predictive value is called precision , and sensitivity is called recall .", + "entity_list": [ + { + "name": "information retrieval", + "type": "task" + }, + { + "name": "positive predictive value", + "type": "metrics" + }, + { + "name": "precision", + "type": "metrics" + }, + { + "name": "sensitivity", + "type": "metrics" + }, + { + "name": "recall", + "type": "metrics" + } + ] + }, + { + "sentence": "In particular , his research focused on areas such as text mining ( extraction , categorization , novelty detection ) and in new theoretical frameworks such as a unified utility-based theory bridging information retrieval , Automatic summarization , free-text Question Answering and related tasks .", + "entity_list": [ + { + "name": "text mining", + "type": "field" + }, + { + "name": "extraction", + "type": "task" + }, + { + "name": "categorization", + "type": "task" + }, + { + "name": "novelty detection", + "type": "task" + }, + { + "name": "information retrieval", + "type": "task" + }, + { + "name": "Automatic summarization", + "type": "task" + }, + { + "name": "free-text Question Answering", + "type": "task" + } + ] + }, + { + "sentence": "Delta robot s have base-mounted rotary actuator s that move a light , stiff , parallelogram arm .", + "entity_list": [ + { + "name": "Delta robot", + "type": "product" + }, + { + "name": "rotary actuator", + "type": "product" + }, + { + "name": "parallelogram arm", + "type": "else" + } + ] + }, + { + "sentence": "The four outcomes can be formulated in a 2 × 2 contingency table or confusion matrix , as follows :", + "entity_list": [ + { + "name": "2 × 2 contingency table", + "type": "metrics" + }, + { + "name": "confusion matrix", + "type": "metrics" + } + ] + }, + { + "sentence": "The actual data mining task is the semi-automatic or automatic analysis of large quantities of data to extract unknown , interesting patterns such as groups of data records ( cluster analysis ) , unusual records ( anomaly detection ) , and dependencies ( association rule mining , sequential pattern mining ) .", + "entity_list": [ + { + "name": "data mining", + "type": "field" + }, + { + "name": "cluster analysis", + "type": "task" + }, + { + "name": "anomaly detection", + "type": "task" + }, + { + "name": "association rule mining", + "type": "task" + }, + { + "name": "sequential pattern mining", + "type": "task" + } + ] + }, + { + "sentence": "For a recommender system , sentiment analysis has been proven to be a valuable technique .", + "entity_list": [ + { + "name": "recommender system", + "type": "product" + }, + { + "name": "sentiment analysis", + "type": "task" + } + ] + }, + { + "sentence": "By chance , the Germans had chosen the operating frequency of the Wotan system very badly ; it operated on 45 MHz , which just happened to be the frequency of the powerful-but-dormant BBC television transmitter at Alexandra Palace .", + "entity_list": [ + { + "name": "Germans", + "type": "else" + }, + { + "name": "Wotan", + "type": "product" + }, + { + "name": "BBC", + "type": "organization" + }, + { + "name": "Alexandra Palace", + "type": "location" + } + ] + }, + { + "sentence": "In Semantic Web applications , and in relatively popular applications of RDF like RSS and FOAF ( Friend a Friend ) , resources tend to be represented by URIs that intentionally denote , and can be used to access , actual data on the World Wide Web .", + "entity_list": [ + { + "name": "Semantic Web applications", + "type": "else" + }, + { + "name": "RDF", + "type": "else" + }, + { + "name": "RSS", + "type": "product" + }, + { + "name": "FOAF", + "type": "product" + }, + { + "name": "Friend a Friend", + "type": "product" + }, + { + "name": "URIs", + "type": "else" + }, + { + "name": "World Wide Web", + "type": "product" + } + ] + }, + { + "sentence": "The Association for the Advancement of Artificial Intelligence has studied this topic in depth", + "entity_list": [ + { + "name": "Association for the Advancement of Artificial Intelligence", + "type": "conference" + } + ] + }, + { + "sentence": "Starting as a curiosity , the speech system of Apple Macintosh has evolved into a fully supported program PlainTalk , for people with vision problems .", + "entity_list": [ + { + "name": "speech system of Apple Macintosh", + "type": "product" + }, + { + "name": "PlainTalk", + "type": "product" + } + ] + }, + { + "sentence": "Other areas of usage for ontologies within NLP include information retrieval , information extraction and automatic summarization .", + "entity_list": [ + { + "name": "NLP", + "type": "field" + }, + { + "name": "information retrieval", + "type": "task" + }, + { + "name": "information extraction", + "type": "task" + }, + { + "name": "automatic summarization", + "type": "task" + } + ] + }, + { + "sentence": "The Institute has collaborated closely with the Janelia Farm Campus of Howard Hughes Medical Institute , the Allen Institute for Brain Science and the National Institutes of Health to develop better methods of reconstructing neuronal architectures .", + "entity_list": [ + { + "name": "Janelia Farm Campus of Howard Hughes Medical Institute", + "type": "organization" + }, + { + "name": "Allen Institute for Brain Science", + "type": "organization" + }, + { + "name": "National Institutes of Health", + "type": "organization" + } + ] + }, + { + "sentence": "Recently , Google announced that Google Translate translates roughly enough text to fill 1 million books in one day ( 2012 ) .", + "entity_list": [ + { + "name": "Google", + "type": "organization" + }, + { + "name": "Google Translate", + "type": "product" + } + ] + }, + { + "sentence": "Events are held worldwide , and are most popular in the United Kingdom , United States , Japan , Singapore , India , South Korea and becoming popular in subcontinent countries such as Sri Lanka .", + "entity_list": [ + { + "name": "United Kingdom", + "type": "country" + }, + { + "name": "United States", + "type": "country" + }, + { + "name": "Japan", + "type": "country" + }, + { + "name": "Singapore", + "type": "country" + }, + { + "name": "India", + "type": "country" + }, + { + "name": "South Korea", + "type": "country" + }, + { + "name": "Sri Lanka", + "type": "country" + } + ] + }, + { + "sentence": "These packages are developed primarily in R , and sometimes in Java , C , C + + , and Fortran .", + "entity_list": [ + { + "name": "R", + "type": "program language" + }, + { + "name": "Java", + "type": "program language" + }, + { + "name": "C", + "type": "program language" + }, + { + "name": "C + +", + "type": "program language" + }, + { + "name": "Fortran", + "type": "program language" + } + ] + }, + { + "sentence": "As part of the 2006 European Conference on Computer Vision ( ECCV ) , Dalal and Triggs teamed up with Cordelia Schmid to apply HOG detectors to the problem of human detection in films and videos .", + "entity_list": [ + { + "name": "2006 European Conference on Computer Vision", + "type": "conference" + }, + { + "name": "ECCV", + "type": "conference" + }, + { + "name": "Dalal", + "type": "researcher" + }, + { + "name": "Triggs", + "type": "researcher" + }, + { + "name": "Cordelia Schmid", + "type": "researcher" + }, + { + "name": "HOG detectors", + "type": "algorithm" + }, + { + "name": "human detection in films and videos", + "type": "task" + } + ] + }, + { + "sentence": "In addition to sensitivity and specificity , the performance of a binary classification test can be measured with positive predictive value ( PPV ) , also known as precision , and negative predictive value ( NPV ) .", + "entity_list": [ + { + "name": "sensitivity", + "type": "metrics" + }, + { + "name": "specificity", + "type": "metrics" + }, + { + "name": "binary classification", + "type": "task" + }, + { + "name": "positive predictive value", + "type": "metrics" + }, + { + "name": "PPV", + "type": "metrics" + }, + { + "name": "precision", + "type": "metrics" + }, + { + "name": "negative predictive value", + "type": "metrics" + }, + { + "name": "NPV", + "type": "metrics" + } + ] + }, + { + "sentence": "Such models may given partial credit for overlapping matches ( such as using the Jaccard index criterion .", + "entity_list": [ + { + "name": "Jaccard index criterion", + "type": "metrics" + } + ] + }, + { + "sentence": "Further , in the case of estimation based on a single sample , it demonstrates philosophical issues and possible misunderstandings in the use of maximum likelihood estimators and likelihood functions .", + "entity_list": [ + { + "name": "maximum likelihood estimators and likelihood functions", + "type": "metrics" + } + ] + } +] \ No newline at end of file