@misc{Pełka_Marcin_Isolation_2024, author={Pełka, Marcin and Dudek, Andrzej}, identifier={DOI: 10.15611/eada.2024.1.01}, year={2024}, rights={Pewne prawa zastrzeżone na rzecz Autorów i Wydawcy}, description={Econometrics = Ekonometria, 2024, Vol. 28, No. 1, s. 1-10}, publisher={Publishing House of Wroclaw University of Economics and Business}, language={eng}, abstract={Aim: Outlier detection is a key part of every data analysis. Although there are many definitions of outliers that can be found in the literature, all of them emphasise that outliers are objects that are in some way different from other objects in the dataset. There are many different approaches that have been proposed, compared, and analysed for the case of classical data. However, there are only few studies that deal with the problem of outlier detection in symbolic data analysis. The paper aimed to propose how to adapt isolation forest for symbolic data cases. Methodology: An isolation forest for symbolic data is used to detect outliers in four different artificial datasets with a known cluster structure and a known number of outliers Results: The results show that the isolation forest for symbolic data is a fast and efficient tool for outlier mining. Implications and recommendations: As the isolation forest for symbolic data appears to be an efficient tool for outlier detection for artificial data, further studies should focus on real data sets that contain outliers (i.e. credit card fraud dataset), and this approach should be compared with other outlier mining tools (i.e. DBCSAN). The authors recommend using the same initial settings for the isolation forest for symbolic data as the settings that are proposed for the isolation forest for classical data. Originality/value: This paper is the first of its kind, focusing not only on the problem of outlier detection in general, but also extending the well-known isolation forest model for symbolic data cases.}, title={Isolation Forests for Symbolic Data as a Tool for Outlier Mining}, type={artykuł}, keywords={symbolic data analysis, isolation forest, outliers, analiza danych symbolicznych, lasy separujące, obserwacje odstające}, }