@inproceedings{8516495,
title = {Latent Feature Combination for Multi-Context Music Recommendation},
author = {Martin Pichl and Eva Zangerle},
url = {https://www.evazangerle.at/wp-content/uploads/2018/12/cbmi18.pdf},
doi = {10.1109/CBMI.2018.8516495},
year = {2018},
date = {2018-09-01},
booktitle = {2018 International Conference on Content-Based Multimedia Indexing (CBMI)},
pages = {1-6},
abstract = {In recent years, music aficionados have increasingly been consuming music via public music streaming platforms. Due to the size of the collections provided, music recommender systems have become a vital component as these aim to provide recommendations that match the user's current context as, throughout the day, users listen to music in numerous different contexts and situations. In this paper, we propose a multi-context-aware track recommender system that jointly exploits information about the current situation and musical preferences of users. To jointly model users by their situational and musical preferences, we cluster users based on their situational features and similarly, cluster music tracks based on their content features. Our experiments show that by relying on Factorization Machines for the computation of recommendations, the proposed approach allows to successfully leverage interaction effects between listening histories, situational and track content information, substantially outperforming a set of baseline recommenders.},
note = {best student paper award},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

In recent years, music aficionados have increasingly been consuming music via public music streaming platforms. Due to the size of the collections provided, music recommender systems have become a vital component as these aim to provide recommendations that match the user's current context as, throughout the day, users listen to music in numerous different contexts and situations. In this paper, we propose a multi-context-aware track recommender system that jointly exploits information about the current situation and musical preferences of users. To jointly model users by their situational and musical preferences, we cluster users based on their situational features and similarly, cluster music tracks based on their content features. Our experiments show that by relying on Factorization Machines for the computation of recommendations, the proposed approach allows to successfully leverage interaction effects between listening histories, situational and track content information, substantially outperforming a set of baseline recommenders.

We present the Height Optimized Trie (HOT), a fast and space-efficient in-memory index structure. The core algorithmic idea of HOT is to dynamically vary the number of bits considered at each node, which enables a consistently high fanout and thereby good cache efficiency. The layout of each node is carefully engineered for compactness and fast search using SIMD instructions. Our experimental results, which use a wide variety of workloads and data sets, show that HOT outperforms other state-of-the-art index structures for string keys both in terms of search performance and memory footprint, while being competitive for integer keys. We believe that these properties make HOT highly useful as a general-purpose index structure for main-memory databases.

@incollection{wikiworkshop18,
title = {Recommendation-Assisted Data Curation for Wikidata},
author = {Eva Zangerle and Claudia M\"{u}ller-Birn},
url = {https://www.evazangerle.at/wp-content/uploads/2018/06/wiki-workshop-2018.pdf
https://www.evazangerle.at/wp-content/uploads/2018/04/www_poster.pdf},
doi = {10.5281/zenodo.1194790},
year = {2018},
date = {2018-04-24},
booktitle = {Wiki Workshop 2018, co-located with The Web Conference},
abstract = {The Wikidata project provides a structured knowledge base that is curated by bots and humans alike. The quality and completeness of data contained is naturally influenced by the users who enter and maintain the data in the form of items described by statements on the platform. Users who are new to the Wikidata environment and its underlying data model, but are, nonetheless, experts in their fields, are confronted with a steep learning curve when aiming to enter information on Wikidata (e.g. regarding the choice of suitable properties for creating statements). In this work, we propose a recommendation-based annotation platform where users who currently work with or on a text are supported in finding suitable Wikidata entities for data extracted from the underlying text source to ultimately feed this structured information to the Wikidata platform. Such recommendations not only support users in annotating their data according to Wikidata's terminological knowledge, but also expand the number of references on the Wikidata platform that reveal the origin of existing statements.
},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}

The Wikidata project provides a structured knowledge base that is curated by bots and humans alike. The quality and completeness of data contained is naturally influenced by the users who enter and maintain the data in the form of items described by statements on the platform. Users who are new to the Wikidata environment and its underlying data model, but are, nonetheless, experts in their fields, are confronted with a steep learning curve when aiming to enter information on Wikidata (e.g. regarding the choice of suitable properties for creating statements). In this work, we propose a recommendation-based annotation platform where users who currently work with or on a text are supported in finding suitable Wikidata entities for data extracted from the underlying text source to ultimately feed this structured information to the Wikidata platform. Such recommendations not only support users in annotating their data according to Wikidata's terminological knowledge, but also expand the number of references on the Wikidata platform that reveal the origin of existing statements.

@inproceedings{milc2018,
title = {geMsearch: Personalized Explorative Music Search},
author = {Christian Esswein and Markus Schedl and Eva Zangerle},
url = {http://ceur-ws.org/Vol-2068/milc2.pdf},
year = {2018},
date = {2018-03-11},
booktitle = {Joint Proceedings of the ACM IUI 2018 Workshops co-located with the 23rd ACM Conference on Intelligent User Interfaces (ACM IUI 2018)},
publisher = {ceur-ws.org},
abstract = {Due to the rise of music streaming platforms, huge collections of music are now available to users on various devices. Within these collections, users aim to find and explore songs based on certain criteria reflecting their current and context-specific preferences. Currently, users are limited to either using search facilities or relying on recommender systems that suggest suitable tracks or artists. Using search facilities requires the user to have some idea about the targeted music and to formulate a query that accurately describes this music, whereas recommender systems are traditionally geared towards long-term shifts of user preferences in contrast to ad-hoc and interactive preference elicitation. To bridge this gap, we propose geMsearch, an approach for personalized, explorative music search based on graph embedding techniques. As the ecosystem of a music collection can be represented as a heterogeneous graph containing nodes describing e.g., tracks, artists, genres or users, we employ graph embedding techniques to learn low-dimensional vector representations for all nodes within the graph. This allows for efficient approximate querying of the collection and, more importantly, for employing visualization strategies that allow the user to explore the music collection in a 3D-space.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Due to the rise of music streaming platforms, huge collections of music are now available to users on various devices. Within these collections, users aim to find and explore songs based on certain criteria reflecting their current and context-specific preferences. Currently, users are limited to either using search facilities or relying on recommender systems that suggest suitable tracks or artists. Using search facilities requires the user to have some idea about the targeted music and to formulate a query that accurately describes this music, whereas recommender systems are traditionally geared towards long-term shifts of user preferences in contrast to ad-hoc and interactive preference elicitation. To bridge this gap, we propose geMsearch, an approach for personalized, explorative music search based on graph embedding techniques. As the ecosystem of a music collection can be represented as a heterogeneous graph containing nodes describing e.g., tracks, artists, genres or users, we employ graph embedding techniques to learn low-dimensional vector representations for all nodes within the graph. This allows for efficient approximate querying of the collection and, more importantly, for employing visualization strategies that allow the user to explore the music collection in a 3D-space.

@inproceedings{DBLP:conf/ecir/zangerle17,
title = {ALF-200k: Towards Extensive Multimodal Analyses of Music Tracks and Playlists},
author = {Eva Zangerle and Michael Tschuggnall and Stefan Wurzinger and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2018/04/ecir-2018-alf.pdf},
doi = {10.1007/978-3-319-76941-7_48},
isbn = {978-3-319-76941-7},
year = {2018},
date = {2018-01-01},
booktitle = {Advances in Information Retrieval - 39th European Conference on IR Research, ECIR 2018},
pages = {584--590},
publisher = {Springer},
address = {Cham},
abstract = {In recent years, approaches in music information retrieval have been based on multimodal analyses of music incorporating audio as well as lyrics features. Because most of those approaches are lacking reusable, high-quality datasets, in this work we propose ALF-200k, a publicly available, novel dataset including 176 audio and lyrics features of more than 200,000 tracks and their attribution to more than 11,000 user-created playlists. While the dataset is of general purpose and thus, may be used in experiments for diverse music information retrieval problems, we present a first multimodal study on playlist features and particularly analyze, which type of features are shared within specific playlists and thus, characterize it. We show that while acoustic features act as the major glue between tracks contained in a playlists, also lyrics features are a powerful means to attribute tracks to playlists.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

In recent years, approaches in music information retrieval have been based on multimodal analyses of music incorporating audio as well as lyrics features. Because most of those approaches are lacking reusable, high-quality datasets, in this work we propose ALF-200k, a publicly available, novel dataset including 176 audio and lyrics features of more than 200,000 tracks and their attribution to more than 11,000 user-created playlists. While the dataset is of general purpose and thus, may be used in experiments for diverse music information retrieval problems, we present a first multimodal study on playlist features and particularly analyze, which type of features are shared within specific playlists and thus, characterize it. We show that while acoustic features act as the major glue between tracks contained in a playlists, also lyrics features are a powerful means to attribute tracks to playlists.

@online{wikidatacon17,
title = {Employing Wikidata for Fostering Scholarly Research},
author = {Adelheid Heftberger and Jakob H\"{o}per and Claudia M\"{u}ller-Birn and Niels-Oliver Walkowski and Eva Zangerle},
url = {https://www.wikidata.org/wiki/Wikidata:WikidataCon_2017/Submissions/Employing_Wikidata_for_Fostering_Scholarly_Research},
year = {2017},
date = {2017-10-26},
organization = {WikiDataCon 2017, Berlin},
abstract = {Wikidata follows a formal and data-oriented approach to the representation of information and assertions which contain such information. This is challenging especially for non-technical experts from the humanities, where the discursive function of text is a constitutive feature of information. These disciplines not only publish but also work on the basis of texts. Consequently, the usage of Wikidata in these disciplines needs mediation and support, to bridge the different thought systems and allow scholar to integrate existing research results into Wikidata easily.

Building on a use-case we will present (a demo which shows) how a workflow for such a mediation as well as the underlying tool chain can look like. The use-case is embedded in the Film and Media Studies Open Access journal Apparatus. We show an annotation tool (neonion) which integrates with the articles of the Apparatus journals and which allows to create triples in a non-technical way by annotating relevant parts of the text. The tool is tightly coupled both with the recommender software Snoopy and Wikidata. Snoopy supports the annotation process by recommending suitable and missing properties from Wikidata for specific items. We aim to automatically ingest the annotations into Wikidata (at the moment, test.wikidata.org is used) without further effort by the creator. We believe that both workflow and toolchain are transferable to other use-cases.

The presented work shows how Wikidata’s knowledge base can benefit from scholarly research results and how scholars can contribute to Wikidata without the need to understand its logic completely. Furthermore, it demonstrates how the consistent use of properties in Wikidata can be supported and how the creation of data into Wikidata can be tied more closely with research contexts.},
keywords = {},
pubstate = {published},
tppubtype = {online}
}

Wikidata follows a formal and data-oriented approach to the representation of information and assertions which contain such information. This is challenging especially for non-technical experts from the humanities, where the discursive function of text is a constitutive feature of information. These disciplines not only publish but also work on the basis of texts. Consequently, the usage of Wikidata in these disciplines needs mediation and support, to bridge the different thought systems and allow scholar to integrate existing research results into Wikidata easily.

Building on a use-case we will present (a demo which shows) how a workflow for such a mediation as well as the underlying tool chain can look like. The use-case is embedded in the Film and Media Studies Open Access journal Apparatus. We show an annotation tool (neonion) which integrates with the articles of the Apparatus journals and which allows to create triples in a non-technical way by annotating relevant parts of the text. The tool is tightly coupled both with the recommender software Snoopy and Wikidata. Snoopy supports the annotation process by recommending suitable and missing properties from Wikidata for specific items. We aim to automatically ingest the annotations into Wikidata (at the moment, test.wikidata.org is used) without further effort by the creator. We believe that both workflow and toolchain are transferable to other use-cases.

The presented work shows how Wikidata’s knowledge base can benefit from scholarly research results and how scholars can contribute to Wikidata without the need to understand its logic completely. Furthermore, it demonstrates how the consistent use of properties in Wikidata can be supported and how the creation of data into Wikidata can be tied more closely with research contexts.

This paper summarizes our contribution (team DBIS) to the AcousticBrainz Genre Task: Content-based music genre recognition from multiple sources as part of MediaEval 2017. We utilize a hierarchical set of multilabel classifiers to predict genres and subgenres and rely on a voting scheme to predict labels across datasets.

@inproceedings{icmr2017,
title = {Improving Context-Aware Music Recommender Systems: Beyond the Pre-filtering Approach},
author = {Martin Pichl and Eva Zangerle and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/acm-icmr-2017.pdf},
doi = {10.1145/3078971.3078980},
year = {2017},
date = {2017-06-07},
booktitle = {Proceedings of the 2017 ACM on International Conference on Multimedia Retrieval, ICMR 2017, Bucharest, Romania},
pages = {201-208},
publisher = {ACM},
abstract = {Over the last years, music consumption has changed fundamentally: people switch from private, mostly limited music collections to huge public music collections provided by music streaming platforms. Thus, the amount of available music has increased dramatically and music streaming platforms heavily rely on recommender systems to assist users in discovering music they like. Incorporating the context of users has been shown to improve the quality of recommendations. Previous approaches based on pre-filtering suffered from a splitted dataset. In this work, we present a context-aware recommender system based on factorization machines that extracts information about the user's context from the names of the user's playlists. Based on a dataset comprising 15,000 users and 1.8 million tracks we show that our proposed approach outperforms the pre-filtering approach substantially in terms of accuracy of the computed recommendations.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Over the last years, music consumption has changed fundamentally: people switch from private, mostly limited music collections to huge public music collections provided by music streaming platforms. Thus, the amount of available music has increased dramatically and music streaming platforms heavily rely on recommender systems to assist users in discovering music they like. Incorporating the context of users has been shown to improve the quality of recommendations. Previous approaches based on pre-filtering suffered from a splitted dataset. In this work, we present a context-aware recommender system based on factorization machines that extracts information about the user's context from the names of the user's playlists. Based on a dataset comprising 15,000 users and 1.8 million tracks we show that our proposed approach outperforms the pre-filtering approach substantially in terms of accuracy of the computed recommendations.

@article{ijmdem17,
title = {Understanding User-curated Playlists on Spotify: A Machine Learning Approach},
author = {Martin Pichl and Eva Zangerle and G\"{u}nther Specht},
doi = {10.4018/IJMDEM.2017100103},
isbn = {1947-8534},
year = {2017},
date = {2017-03-01},
journal = {International Journal of Multimedia Data Engineering and Management IJMDEM},
volume = {8},
number = {4},
pages = {44-59},
abstract = {Music streaming platforms enable people to access millions of tracks using computers and mobile devices. However, users cannot browse manually millions of tracks to find music they like. Building recommender systems suggesting music fitting the current context of a user is a challenging task. A deeper understanding for the characteristics of user-curated playlists naturally contributes to more personalized recommendations. To get a deeper understanding of how users organize music nowadays, we analyze user-curated playlists from the music streaming platform Spotify. Based on the audio features of the tracks, we find an explanation of differences in the playlists using a PCA and are able to group playlists using spectral clustering. Our findings about playlist characteristics can be exploited in a SVD-based music recommender system and our proposed clustering approach for finding groups of similar playlists is easy to integrate into a recommender system using pre- or post-filtering techniques.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Music streaming platforms enable people to access millions of tracks using computers and mobile devices. However, users cannot browse manually millions of tracks to find music they like. Building recommender systems suggesting music fitting the current context of a user is a challenging task. A deeper understanding for the characteristics of user-curated playlists naturally contributes to more personalized recommendations. To get a deeper understanding of how users organize music nowadays, we analyze user-curated playlists from the music streaming platform Spotify. Based on the audio features of the tracks, we find an explanation of differences in the playlists using a PCA and are able to group playlists using spectral clustering. Our findings about playlist characteristics can be exploited in a SVD-based music recommender system and our proposed clustering approach for finding groups of similar playlists is easy to integrate into a recommender system using pre- or post-filtering techniques.

@inproceedings{hicss17,
title = {A Peer-Based Approach on Analyzing Hacked Twitter Accounts},
author = {Benjamin Murauer and Eva Zangerle and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/hicss17.pdf
https://hdl.handle.net/10125/41378},
year = {2017},
date = {2017-01-01},
booktitle = {Proceedings of the 50th Hawaii International Conference on System Sciences, HICSS 2017, Big Island, Hawaii, USA, January 4-7, 2017},
pages = {1841--1850},
publisher = {IEEE},
abstract = {Social media has become an important part of the lives of their hundreds of millions of users. Hackers make use of the large target audience by sending malicious content, often by hijacking existing accounts. This phenomenon has caused widespread research on how to detect hacked accounts, where different approaches exist. This work sets out to analyze the possibilities of including the reactions of hacked Twitter accounts’ peers into a detection system. Based on a dataset of six million tweets crawled from Twitter over the course of two years, we select a subset of tweets in which users react to alleged hacks of other accounts. We then gather and analyze the responses to those messages to reconstruct the conversations made. A quantitative analysis of these conversations shows that 30% of the users that are allegedly being hacked reply to the accusations, suggesting that these users acknowledge that their account was hacked.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Social media has become an important part of the lives of their hundreds of millions of users. Hackers make use of the large target audience by sending malicious content, often by hijacking existing accounts. This phenomenon has caused widespread research on how to detect hacked accounts, where different approaches exist. This work sets out to analyze the possibilities of including the reactions of hacked Twitter accounts’ peers into a detection system. Based on a dataset of six million tweets crawled from Twitter over the course of two years, we select a subset of tweets in which users react to alleged hacks of other accounts. We then gather and analyze the responses to those messages to reconstruct the conversations made. A quantitative analysis of these conversations shows that 30% of the users that are allegedly being hacked reply to the accusations, suggesting that these users acknowledge that their account was hacked.

@inproceedings{opensym16,
title = {An Empirical Evaluation of Property Recommender Systems for Wikidata and Collaborative Knowledge Bases},
author = {Eva Zangerle and Wolfgang Gassler and Stefan Steinhauser and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/opensym16.pdf},
doi = {10.1145/2957792.2957804},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of the 12th International Symposium on Open Collaboration},
publisher = {ACM},
address = {Berlin, Germany},
series = {OpenSym '16},
abstract = {The Wikidata platform is a crowdsourced, structured knowledgebase aiming to provide integrated, free and language-agnostic facts which are---amongst others---used by Wikipedias. Users who actively enter, review and revise data on Wikidata are assisted by a property suggesting system which provides users with properties that might also be applicable to a given item. We argue that evaluating and subsequently improving this recommendation mechanism and hence, assisting users, can directly contribute to an even more integrated, consistent and extensive knowledge base serving a huge variety of applications. However, the quality and usefulness of such recommendations has not been evaluated yet. In this work, we provide the first evaluation of different approaches aiming to provide users with property recommendations in the process of curating information on Wikidata. We compare the approach currently facilitated on Wikidata with two state-of-the-art recommendation approaches stemming from the field of RDF recommender systems and collaborative information systems. Further, we also evaluate hybrid recommender systems combining these approaches. Our evaluations show that the current recommendation algorithm works well in regards to recall and precision, reaching a recall@7 of 79.71% and a precision@7 of 27.97%. We also find that generally, incorporating contextual as well as classifying information into the computation of property recommendations can further improve its performance significantly.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

The Wikidata platform is a crowdsourced, structured knowledgebase aiming to provide integrated, free and language-agnostic facts which are---amongst others---used by Wikipedias. Users who actively enter, review and revise data on Wikidata are assisted by a property suggesting system which provides users with properties that might also be applicable to a given item. We argue that evaluating and subsequently improving this recommendation mechanism and hence, assisting users, can directly contribute to an even more integrated, consistent and extensive knowledge base serving a huge variety of applications. However, the quality and usefulness of such recommendations has not been evaluated yet. In this work, we provide the first evaluation of different approaches aiming to provide users with property recommendations in the process of curating information on Wikidata. We compare the approach currently facilitated on Wikidata with two state-of-the-art recommendation approaches stemming from the field of RDF recommender systems and collaborative information systems. Further, we also evaluate hybrid recommender systems combining these approaches. Our evaluations show that the current recommendation algorithm works well in regards to recall and precision, reaching a recall@7 of 79.71% and a precision@7 of 27.97%. We also find that generally, incorporating contextual as well as classifying information into the computation of property recommendations can further improve its performance significantly.

@inproceedings{ismir16,
title = {Can Microblogs Predict Music Charts? An Analysis of the Relationship between #Nowplaying Tweets and Music Charts},
author = {Eva Zangerle and Martin Pichl and Benedikt Hupfauf and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/ismir16.pdf
https://wp.nyu.edu/ismir2016/event/proceedings/},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of the 17th International Society for Music Information Retrieval Conference 2016 (ISMIR 2016)},
publisher = {ISMIR},
abstract = {Twitter is one of the leading social media platforms, where hundreds of millions of tweets cover a wide range of topics, including the music a user is listening to. Such #nowplaying tweets may serve as an indicator for future charts, however, this has not been thoroughly studied yet. Therefore, we investigate to which extent such tweets correlate with the Billboard Hot 100 charts and whether they allow for music charts prediction. The analysis is based on #nowplaying tweets and the Billboard charts of the years 2014 and 2015. We analyze three different aspects in regards to the time series representing #nowplaying tweets and the Billboard charts: (i) the correlation of Twitter and the Billboard charts, (ii) the temporal relation between those two and (iii) the prediction performance in regards to charts positions of tracks. We find that while there is a mild correlation between tweets and the charts, there is a temporal lag between these two time series for 90% of all tracks. As for the predictive power of Twitter, we find that incorporating Twitter information in a multivariate model results in a significant decrease of both the mean RMSE as well as the variance of rank predictions.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Twitter is one of the leading social media platforms, where hundreds of millions of tweets cover a wide range of topics, including the music a user is listening to. Such #nowplaying tweets may serve as an indicator for future charts, however, this has not been thoroughly studied yet. Therefore, we investigate to which extent such tweets correlate with the Billboard Hot 100 charts and whether they allow for music charts prediction. The analysis is based on #nowplaying tweets and the Billboard charts of the years 2014 and 2015. We analyze three different aspects in regards to the time series representing #nowplaying tweets and the Billboard charts: (i) the correlation of Twitter and the Billboard charts, (ii) the temporal relation between those two and (iii) the prediction performance in regards to charts positions of tracks. We find that while there is a mild correlation between tweets and the charts, there is a temporal lag between these two time series for 90% of all tracks. As for the predictive power of Twitter, we find that incorporating Twitter information in a multivariate model results in a significant decrease of both the mean RMSE as well as the variance of rank predictions.

@inproceedings{hicss16,
title = {Analysing the Usage of Wikipedia on Twitter: Understanding Inter-Language Links},
author = {Eva Zangerle and Georg Schmidhammer and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/hicss16.pdf},
doi = {10.1109/HICSS.2016.243},
year = {2016},
date = {2016-01-01},
booktitle = {49th Hawaii International Conference on System Sciences, HICSS 2016, Kauai, Hawaii, USA, January 5-8, 2016},
pages = {1920--1929},
publisher = {IEEE},
abstract = {Wikipedia is a central source of information as 450 million people consult the online encyclopaedia every month to satisfy their information needs. Some of these users also refer to Wikipedia within their tweets. In this paper, we analyse links within tweets referring to a Wikipedia of a language different from the tweet's language. Therefore, we investigate causes for the usage of such inter-language links by comparing the tweeted article and its counterpart in the tweet's language (if there is any) in terms of article quality. We find that the main cause for inter-language links is the non-existence of the article in the tweet's language. Furthermore, we observe that the quality of the tweeted articles is constantly higher in comparison to their counterparts, suggesting that users choose the article of higher quality even when tweeting in another language. Moreover, we find that English is the most dominant target for inter-language links.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Wikipedia is a central source of information as 450 million people consult the online encyclopaedia every month to satisfy their information needs. Some of these users also refer to Wikipedia within their tweets. In this paper, we analyse links within tweets referring to a Wikipedia of a language different from the tweet's language. Therefore, we investigate causes for the usage of such inter-language links by comparing the tweeted article and its counterpart in the tweet's language (if there is any) in terms of article quality. We find that the main cause for inter-language links is the non-existence of the article in the tweet's language. Furthermore, we observe that the quality of the tweeted articles is constantly higher in comparison to their counterparts, suggesting that users choose the article of higher quality even when tweeting in another language. Moreover, we find that English is the most dominant target for inter-language links.

@inproceedings{ism16,
title = {Understanding Playlist Creation on Music Streaming Platforms},
author = {Martin Pichl and Eva Zangerle and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/ISM2016.pdf},
doi = {10.1109/ISM.2016.0107},
isbn = {978-1-5090-4571-6},
year = {2016},
date = {2016-01-01},
booktitle = {IEEE International Symposium on Multimedia, ISM 2016, San Jose, CA, USA, December 11-13, 2016},
pages = {475--480},
publisher = {IEEE Computer Society},
abstract = {Music streaming platforms enable people to access millions of tracks using computers and mobile devices. The latter allow users consume different music during different activities. Both, the sheer amount of music and the mobile access to music makes music organization an interesting topic for multimedia researchers. Assisting users to organize their music and make the music they like easily available in the right moment, contributes to increased usability of music streaming platforms. To get a deeper understanding of how users organize music nowadays, we analyze user-created playlists crawled from the music streaming platform Spotify. Using this new data set we find an explanation of differences in the playlists using audio features and based on this compute playlist clusters. We find that 91% of all users create at least one playlist in the “feel good music”-cluster and classical music or rap music can be considered as niche music with respect to the number of playlists, however not as niche music when considering the number of users. To foster research in this field, we make our analysis tool publicly available.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Music streaming platforms enable people to access millions of tracks using computers and mobile devices. The latter allow users consume different music during different activities. Both, the sheer amount of music and the mobile access to music makes music organization an interesting topic for multimedia researchers. Assisting users to organize their music and make the music they like easily available in the right moment, contributes to increased usability of music streaming platforms. To get a deeper understanding of how users organize music nowadays, we analyze user-created playlists crawled from the music streaming platform Spotify. Using this new data set we find an explanation of differences in the playlists using audio features and based on this compute playlist clusters. We find that 91% of all users create at least one playlist in the “feel good music”-cluster and classical music or rap music can be considered as niche music with respect to the number of playlists, however not as niche music when considering the number of users. To foster research in this field, we make our analysis tool publicly available.

@inproceedings{cmc16,
title = {Geolocating German on Twitter Hitches and Glitches of Building and Exploring a Twitter Corpus},
author = {Bettina Larl and Eva Zangerle},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/cmc16.pdf
https://nl.ijs.si/janes/wp-content/uploads/2016/09/Geolocating-German-on-Twitter.html},
year = {2016},
date = {2016-01-01},
booktitle = {4th Conference on CMC and Social Media Corpora for the Humanities (CMC-Corpora2016; Sep 2016, Ljubljana, Slovenia)},
abstract = {Languages, and thus Linguistics, have always been influenced by technological developments and new media forms and every development brought new methods and approaches of how language can or should be studied and explored. About 16% of the EU residents speak German as a native language and this makes it the widest spread language within the European Union. German is a pluricentric language with three standard varieties: German Standard German, Swiss Standard German and Austrian Standard German. The official borders between Germany, Austria and Switzerland also form the boundary between the three standards. Because of easy access and informal communication methods, more and more oral markers find their way into written language. This is often showcased on social media platforms such as Twitter. Every tweet includes language output in the form of short messages that
can contain different regional characteristics. Tweets can be geolocated, which means these language outputs can be assigned to the geographic location they were tweeted from. To explore research questions like “Is there a connection between the language output and the geographic location tweets were sent from?” and “Could, for example, lexical varieties be allocated to a specific region by geolocation information provided in tweets?” We
are building a Twitter Corpus. The Corpus contains tweets collected via the Twitter streaming API, using a binding box around the rough approximation of the Deutscher Sprachraum and re-filtering the results for Tweets sent within Germany, Austria, Switzerland and South Tyrol/Italy. This paper shows preliminary findings of hand sampling a random sample of 1,000,000 Tweets.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Languages, and thus Linguistics, have always been influenced by technological developments and new media forms and every development brought new methods and approaches of how language can or should be studied and explored. About 16% of the EU residents speak German as a native language and this makes it the widest spread language within the European Union. German is a pluricentric language with three standard varieties: German Standard German, Swiss Standard German and Austrian Standard German. The official borders between Germany, Austria and Switzerland also form the boundary between the three standards. Because of easy access and informal communication methods, more and more oral markers find their way into written language. This is often showcased on social media platforms such as Twitter. Every tweet includes language output in the form of short messages that
can contain different regional characteristics. Tweets can be geolocated, which means these language outputs can be assigned to the geographic location they were tweeted from. To explore research questions like “Is there a connection between the language output and the geographic location tweets were sent from?” and “Could, for example, lexical varieties be allocated to a specific region by geolocation information provided in tweets?” We
are building a Twitter Corpus. The Corpus contains tweets collected via the Twitter streaming API, using a binding box around the rough approximation of the Deutscher Sprachraum and re-filtering the results for Tweets sent within Germany, Austria, Switzerland and South Tyrol/Italy. This paper shows preliminary findings of hand sampling a random sample of 1,000,000 Tweets.

@inproceedings{somera15,
title = {Towards a Context-Aware Music Recommendation Approach: What is Hidden in the Playlist Name?},
author = {Martin Pichl and Eva Zangerle and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/somera15.pdf},
doi = {10.1109/ICDMW.2015.145},
year = {2015},
date = {2015-01-01},
booktitle = {15th IEEE International Conference on Data Mining Workshops (ICDM 2015)},
pages = {1360--1365},
publisher = {IEEE},
address = {Atlantic City},
series = {ICDM 15},
abstract = {New distribution channels like music streaming platforms paved way for making more and more diverse music available to users. Thus, music recommender systems got in the focus of research in academia as well as industry. Collaborative filtering-based recommender systems have been proven useful, but there is space left for improvements by adapting this general approach to better fit to the music recommendations problem. In this work, we incorporate context-based information about the music consumption into the recommendation process. This information is extracted from playlist names, which are analyzed and aggregated into so-called "contextual clusters". We find that the listening context plays an important role and thus allows for providing recommendations reaching precision values 33% higher than traditional approaches. Hence, the main contribution of this paper is a new method that extracts and integrates contextual information from playlist names into the recommendation process for improving music recommendations.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

New distribution channels like music streaming platforms paved way for making more and more diverse music available to users. Thus, music recommender systems got in the focus of research in academia as well as industry. Collaborative filtering-based recommender systems have been proven useful, but there is space left for improvements by adapting this general approach to better fit to the music recommendations problem. In this work, we incorporate context-based information about the music consumption into the recommendation process. This information is extracted from playlist names, which are analyzed and aggregated into so-called "contextual clusters". We find that the listening context plays an important role and thus allows for providing recommendations reaching precision values 33% higher than traditional approaches. Hence, the main contribution of this paper is a new method that extracts and integrates contextual information from playlist names into the recommendation process for improving music recommendations.

@inproceedings{opensym15,
title = {#Wikipedia on Twitter: Analyzing Tweets About Wikipedia},
author = {Eva Zangerle and Georg Schmidhammer and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/opensym15.pdf},
doi = {10.1145/2788993.2789845},
isbn = {978-1-4503-3666-6},
year = {2015},
date = {2015-01-01},
booktitle = {Proceedings of the 11th International Symposium on Open Collaboration},
pages = {14:1--14:8},
publisher = {ACM},
address = {San Francisco, California},
series = {OpenSym '15},
abstract = {Wikipedia has long become a standard source of information on the web and as such is widely referenced on the web and in social media. This paper analyzes the usage of Wikipedia on Twitter by looking into languages used on both platforms, content features of posted articles and recent edits of those articles. The analysis is based on a set of four million tweets and links these tweets to Wikipedia articles and their features to identify interesting relations. We find that within English and Japanese tweets containing a link to Wikipedia, 97% of the links lead to the English resp. Japanese Wikipedia, whereas for other languages 20% of the tweets contain a link to a Wikipedia of a different language. Our results also indicate that the number of tweets about a certain topic is not correlated to the number of recent edits on the particular page at the time of sending the tweet.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Wikipedia has long become a standard source of information on the web and as such is widely referenced on the web and in social media. This paper analyzes the usage of Wikipedia on Twitter by looking into languages used on both platforms, content features of posted articles and recent edits of those articles. The analysis is based on a set of four million tweets and links these tweets to Wikipedia articles and their features to identify interesting relations. We find that within English and Japanese tweets containing a link to Wikipedia, 97% of the links lead to the English resp. Japanese Wikipedia, whereas for other languages 20% of the tweets contain a link to a Wikipedia of a different language. Our results also indicate that the number of tweets about a certain topic is not correlated to the number of recent edits on the particular page at the time of sending the tweet.

@inproceedings{sowemine15,
title = {#nowplaying on #Spotify: Leveraging Spotify Information on Twitter for Artist Recommendations},
author = {Martin Pichl and Eva Zangerle and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2018/08/SoWeMine_2015_Pichl.pdf},
doi = {10.1007/978-3-319-24800-4_14},
year = {2015},
date = {2015-01-01},
booktitle = {Current Trends in Web Engineering, 15th International Conference, ICWE 2015 Workshops (Revised Selected Papers)},
pages = {163--174},
publisher = {Springer},
abstract = {The rise of the web enabled new distribution channels like online stores and streaming platforms, offering a vast amount of different products. For helping customers finding products according to their taste on those platforms, recommender systems play an important role. Besides focusing on the computation of the recommendations itself, in literature the problem of a lack of data appropriate for research is discussed. In order to overcome this problem, we present a music recommendation system exploiting a dataset containing listening histories of users, who posted what they are listening to at the moment on the microblogging platform Twitter. As this dataset is updated daily, we propose a genetic algorithm, which allows the recommender system to adopt its input parameters to the extended dataset. In the evaluation part of this work, we benchmark the presented recommender system against two
baseline approaches. We show that the performance of our proposed recommender is promising and clearly outperforms the baseline.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

The rise of the web enabled new distribution channels like online stores and streaming platforms, offering a vast amount of different products. For helping customers finding products according to their taste on those platforms, recommender systems play an important role. Besides focusing on the computation of the recommendations itself, in literature the problem of a lack of data appropriate for research is discussed. In order to overcome this problem, we present a music recommendation system exploiting a dataset containing listening histories of users, who posted what they are listening to at the moment on the microblogging platform Twitter. As this dataset is updated daily, we propose a genetic algorithm, which allows the recommender system to adopt its input parameters to the extended dataset. In the evaluation part of this work, we benchmark the presented recommender system against two
baseline approaches. We show that the performance of our proposed recommender is promising and clearly outperforms the baseline.

@inproceedings{gvdb14,
title = {Combining Spotify and Twitter Data for Generating a Recent and Public Dataset for Music Recommendation},
author = {Martin Pichl and Eva Zangerle and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/gvdb14.pdf
https://ceur-ws.org/Vol-1313/},
year = {2014},
date = {2014-01-01},
booktitle = {Proceedings of the 26nd Workshop Grundlagen von Datenbanken (GvDB 2014), Ritten, Italy},
pages = {35--40},
publisher = {CEUR-WS.org, ISSN 1613-0073, Vol. 1313},
abstract = {In this paper, we present a dataset based on publicly available information. It contains listening histories of Spotify users, who posted what they are listening at the moment on the micro blogging platform Twitter. The dataset was
derived using the Twitter Streaming API and is updated regularly. To show an application of this dataset, we implement and evaluate a pure collaborative filtering based recommender system. The performance of this system can be
seen as a baseline approach for evaluating further, more sophisticated recommendation approaches. These approaches will be implemented and benchmarked against our baseline approach in future works.},
howpublished = {online},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

In this paper, we present a dataset based on publicly available information. It contains listening histories of Spotify users, who posted what they are listening at the moment on the micro blogging platform Twitter. The dataset was
derived using the Twitter Streaming API and is updated regularly. To show an application of this dataset, we implement and evaluate a pure collaborative filtering based recommender system. The performance of this system can be
seen as a baseline approach for evaluating further, more sophisticated recommendation approaches. These approaches will be implemented and benchmarked against our baseline approach in future works.

@article{fgcs,
title = {Guided Curation of Semistructured Data in Collaboratively-built Knowledge Bases},
author = {Wolfgang Gassler and Eva Zangerle and G\"{u}nther Specht},
doi = {10.1016/j.future.2013.05.008},
year = {2014},
date = {2014-01-01},
journal = {Journal on Future Generation Computer Systems},
volume = {31},
pages = {111-119},
publisher = {Elsevier Science Publishers},
abstract = {The collaborative curation of semistructured knowledge has become a popular paradigm on the web and also within enterprises. In such knowledge bases a common structure of the stored information is crucial for providing efficient and precise search facilities. However, the task of refining, extending and homogenizing knowledge and its structure is very complex. In this article we present two paradigms for the simplification of this task by providing guidance mechanisms to the user. Both paradigms aim at combining the power of automated extraction algorithms with the semantic awareness of human users to accomplish this refinement task.},
note = {impact factor 1.978.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

The collaborative curation of semistructured knowledge has become a popular paradigm on the web and also within enterprises. In such knowledge bases a common structure of the stored information is crucial for providing efficient and precise search facilities. However, the task of refining, extending and homogenizing knowledge and its structure is very complex. In this article we present two paradigms for the simplification of this task by providing guidance mechanisms to the user. Both paradigms aim at combining the power of automated extraction algorithms with the semantic awareness of human users to accomplish this refinement task.

@inproceedings{sac14,
title = {“Sorry, I was hacked": A Classification of Compromised Twitter Accounts},
author = {Eva Zangerle and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/sac14.pdf},
doi = {10.1145/2554850.2554894},
year = {2014},
date = {2014-01-01},
booktitle = {Proceedings of the 29th ACM Symposium on Applied Computing},
pages = {587--593},
publisher = {ACM},
address = {Gyeongju, Korea},
abstract = {Online social networks like Facebook or Twitter have become powerful information diffusion platforms as they have attracted hundreds of millions of users. The possibility of reaching millions of users within these networks not only attracted standard users, but also cyber-criminals who abuse the networks by spreading spam. This is accomplished by either creating fake accounts, bots, cyborgs or by hacking and compromising accounts. Compromised accounts are subsequently used to spread spam in the name of their legitimate owner. This work sets out to investigate how Twitter users react to having their account hacked and how they deal with compromised accounts.
We crawled a data set of tweets in which users state that their account was hacked and subsequently performed a supervised classification of these tweets based on the reaction and behavior of the respective user. We find that 27.30% of the analyzed Twitter users change to a new account once their account was hacked. 50.91% of all users either state that they were hacked or apologize for any unsolicited tweets or direct messages.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Online social networks like Facebook or Twitter have become powerful information diffusion platforms as they have attracted hundreds of millions of users. The possibility of reaching millions of users within these networks not only attracted standard users, but also cyber-criminals who abuse the networks by spreading spam. This is accomplished by either creating fake accounts, bots, cyborgs or by hacking and compromising accounts. Compromised accounts are subsequently used to spread spam in the name of their legitimate owner. This work sets out to investigate how Twitter users react to having their account hacked and how they deal with compromised accounts.
We crawled a data set of tweets in which users state that their account was hacked and subsequently performed a supervised classification of these tweets based on the reaction and behavior of the respective user. We find that 27.30% of the analyzed Twitter users change to a new account once their account was hacked. 50.91% of all users either state that they were hacked or apologize for any unsolicited tweets or direct messages.

@inproceedings{cybercrime14,
title = {Cybercrime on Twitter: Shifting the User Back into Focus},
author = {Eva Zangerle and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/cybercrime14.pdf},
year = {2014},
date = {2014-01-01},
booktitle = {Proceedings of the WebScience Cybercrime / Cyberwar Workshop, co-located with WebSci14},
abstract = {The microblogging platform Twitter has not only gained hundreds of millions of users throughout the last years, also cyber-criminals have been attracted to Twitter by the sheer volume of users engaging on the platform. This lead to multiple forms of fraud on Twitter, which in turn has also attracted academia and triggered a series of significant scientific contributions dedicated to multiple different aspects related to cybercrime on Twitter. However, we think that
there are still open issues which remain to be tackled. This paper sets out to highlight missing pieces required to understand how cybercrime affects users on the Twitter platform and calls for shifting the user into the focus of research.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

The microblogging platform Twitter has not only gained hundreds of millions of users throughout the last years, also cyber-criminals have been attracted to Twitter by the sheer volume of users engaging on the platform. This lead to multiple forms of fraud on Twitter, which in turn has also attracted academia and triggered a series of significant scientific contributions dedicated to multiple different aspects related to cybercrime on Twitter. However, we think that
there are still open issues which remain to be tackled. This paper sets out to highlight missing pieces required to understand how cybercrime affects users on the Twitter platform and calls for shifting the user into the focus of research.

@inproceedings{ismm14,
title = {#nowplaying Music Dataset: Extracting Listening Behavior from Twitter},
author = {Eva Zangerle and Martin Pichl and Wolfgang Gassler and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/ismm14.pdf},
doi = {10.1145/2661714.2661719},
year = {2014},
date = {2014-01-01},
booktitle = {Proceedings of the 1st ACM International Workshop on Internet-Scale Multimedia Management},
pages = {21--26},
publisher = {ACM},
address = {Orlando, Florida, USA},
series = {ISMM '14},
abstract = {The extraction of information from online social networks has become popular in both industry and academia as these data sources allow for innovative applications. However, in the area of music recommender systems and music information retrieval, respective data is hardly exploited. In this paper, we present the #nowplaying dataset, which leverages social media for the creation of a diverse and constantly updated dataset, which describes the music listening behavior of users. For the creation of the dataset, we rely on Twitter, which is frequently facilitated for posting which music the respective user is currently listening to. From such tweets, we extract track and artist information and further metadata. The dataset currently comprises 49 million listening events, 144,011 artists, 1,346,203 tracks and 4,150,615 users which makes it considerably larger than existing datasets.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

The extraction of information from online social networks has become popular in both industry and academia as these data sources allow for innovative applications. However, in the area of music recommender systems and music information retrieval, respective data is hardly exploited. In this paper, we present the #nowplaying dataset, which leverages social media for the creation of a diverse and constantly updated dataset, which describes the music listening behavior of users. For the creation of the dataset, we rely on Twitter, which is frequently facilitated for posting which music the respective user is currently listening to. From such tweets, we extract track and artist information and further metadata. The dataset currently comprises 49 million listening events, 144,011 artists, 1,346,203 tracks and 4,150,615 users which makes it considerably larger than existing datasets.

@article{snam,
title = {On the impact of text similarity functions on hashtag recommendations in microblogging environments},
author = {Eva Zangerle and Wolfgang Gassler and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/snam.pdf},
doi = {10.1007/s13278-013-0108-x},
issn = {1869-5450},
year = {2013},
date = {2013-01-01},
journal = {Social Network Analysis and Mining},
volume = {3},
number = {4},
pages = {889-898},
publisher = {Springer Vienna},
abstract = {Microblogging applications such as Twitter are experiencing tremendous success. Microblog users utilize hashtags to categorize posted messages which aim at bringing order to the myriads of microblog messages. However, the percentage of messages incorporating hashtags is small and the used hashtags are very heterogeneous as hashtags may be chosen freely and may consist of any arbitrary combination of characters. This heterogeneity and the lack of use of hashtags lead to significant drawbacks in regards to the search functionality as messages are not categorized in a homogeneous way. In this paper, we present an approach for the recommendation of hashtags suitable for the message the user currently enters which aims at creating a more homogeneous set of hashtags. Furthermore, we present a detailed study on how the similarity measures used for the computation of recommendations influence the final set of recommended hashtags.},
note = {(The final publication is available at link.springer.com.)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

Microblogging applications such as Twitter are experiencing tremendous success. Microblog users utilize hashtags to categorize posted messages which aim at bringing order to the myriads of microblog messages. However, the percentage of messages incorporating hashtags is small and the used hashtags are very heterogeneous as hashtags may be chosen freely and may consist of any arbitrary combination of characters. This heterogeneity and the lack of use of hashtags lead to significant drawbacks in regards to the search functionality as messages are not categorized in a homogeneous way. In this paper, we present an approach for the recommendation of hashtags suitable for the message the user currently enters which aims at creating a more homogeneous set of hashtags. Furthermore, we present a detailed study on how the similarity measures used for the computation of recommendations influence the final set of recommended hashtags.

@article{dbspektrum,
title = {Dissertationen: Leveraging Recommender Systems for the Creation and Maintenance of Structure within Collaborative Social Media Platforms},
author = {Eva Zangerle},
doi = {10.1007/s13222-013-0138-6},
year = {2013},
date = {2013-01-01},
journal = {Datenbank-Spektrum},
volume = {13},
number = {3},
pages = {239},
abstract = {During the last decade, the web transformed from a web of information consumers to a web of information producers. In particular, the advent of online social media platforms is hugely responsible for this shift as people now actively post information in knowledge bases, engage in online communities and contribute to social media platforms. Hence, a vast amount of new information is produced each day. This publicly available data is an invaluable source of information which still is to be fully exploited. Due to the broad span of users of such systems (originating from different cultures and backgrounds, speaking different languages, etc.), the information provided features a limited amount of common structure, as e.g., objects are named differently and information is structured differently. This is a severe constraint in regards to the performance of search facilities. This thesis proposes to facilitate recommender systems to create and maintain a common structure within collaborative social media platforms aiming at improving search performance. For this purpose, two different recommender systems for two showcase platforms are presented. The first recommender system provides recommendations for structuring information within a semistructured information system whereas the second recommender systems is a hashtag recommender system for microblogging services.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}

During the last decade, the web transformed from a web of information consumers to a web of information producers. In particular, the advent of online social media platforms is hugely responsible for this shift as people now actively post information in knowledge bases, engage in online communities and contribute to social media platforms. Hence, a vast amount of new information is produced each day. This publicly available data is an invaluable source of information which still is to be fully exploited. Due to the broad span of users of such systems (originating from different cultures and backgrounds, speaking different languages, etc.), the information provided features a limited amount of common structure, as e.g., objects are named differently and information is structured differently. This is a severe constraint in regards to the performance of search facilities. This thesis proposes to facilitate recommender systems to create and maintain a common structure within collaborative social media platforms aiming at improving search performance. For this purpose, two different recommender systems for two showcase platforms are presented. The first recommender system provides recommendations for structuring information within a semistructured information system whereas the second recommender systems is a hashtag recommender system for microblogging services.

@phdthesis{evaphd,
title = {Leveraging Recommender Systems for the Creation and Maintenance of Structure within Collaborative Social Media Platforms},
author = {Eva Zangerle},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/evaphd.pdf},
year = {2013},
date = {2013-01-01},
school = {University of Innsbruck},
abstract = {During the last decade, the web transformed from a web of information consumers to a web of information producers. In particular, the advent of online social media platforms is hugely responsible for this shift as people now actively post information in knowledge bases, engage in online communities and contribute to social media platforms. Hence, a vast amount of new information is produced each day. This publicly available data is an invaluable source of information which still is to be fully exploited. Due to the broad span of users of such systems (originating from different cultures and backgrounds, speaking different languages, etc.), the information provided features a limited amount of common structure, as e.g., objects are named differently and information is structured differently. This is a severe constraint in regards to the performance of search facilities. This thesis proposes to facilitate recommender systems to create and maintain a common structure within collaborative social media platforms aiming at improving search performance. For this purpose, two different recommender systems for two showcase platforms are presented. The first recommender system provides recommendations for structuring information within a semistructured information system whereas the second recommender systems is a hashtag recommender system for microblogging services.},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}

During the last decade, the web transformed from a web of information consumers to a web of information producers. In particular, the advent of online social media platforms is hugely responsible for this shift as people now actively post information in knowledge bases, engage in online communities and contribute to social media platforms. Hence, a vast amount of new information is produced each day. This publicly available data is an invaluable source of information which still is to be fully exploited. Due to the broad span of users of such systems (originating from different cultures and backgrounds, speaking different languages, etc.), the information provided features a limited amount of common structure, as e.g., objects are named differently and information is structured differently. This is a severe constraint in regards to the performance of search facilities. This thesis proposes to facilitate recommender systems to create and maintain a common structure within collaborative social media platforms aiming at improving search performance. For this purpose, two different recommender systems for two showcase platforms are presented. The first recommender system provides recommendations for structuring information within a semistructured information system whereas the second recommender systems is a hashtag recommender system for microblogging services.

@inproceedings{msm12,
title = {Exploiting Twitter's Collective Knowledge for Music Recommendations},
author = {Eva Zangerle and Wolfgang Gassler and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/msm12.pdf
https://ceur-ws.org/Vol-838/},
year = {2012},
date = {2012-04-01},
booktitle = {Proceedings, 2nd Workshop on Making Sense of Microposts (#MSM2012): Big things come in small packages, Lyon, France, 16 April 2012},
pages = {14--17},
abstract = {Twitter is the largest source of public opinion and also contains a vast amount of information about its users’ music favors or listening behaviour. However, this source has not been exploited for the recommendation of music yet. In this paper, we present how Twitter can be facilitated for the creation of a data set upon which music recommendations can be computed. The data set is based on microposts which were automatically generated by music player software or posted by users and may also contain further information about audio tracks.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Twitter is the largest source of public opinion and also contains a vast amount of information about its users’ music favors or listening behaviour. However, this source has not been exploited for the recommendation of music yet. In this paper, we present how Twitter can be facilitated for the creation of a data set upon which music recommendations can be computed. The data set is based on microposts which were automatically generated by music player software or posted by users and may also contain further information about audio tracks.

@incollection{collsemweb,
title = {Dealing with Structure Heterogeneity in Semantic Collaborative Environments},
author = {Eva Zangerle and Wolfgang Gassler},
editor = {Brueggemann, Stefan and d'Amato, Claudia},
doi = {10.4018/978-1-4666-0894-8},
year = {2012},
date = {2012-01-01},
booktitle = {Collaboration and the Semantic Web: Social Networks, Knowledge Networks and Knowledge Resources},
publisher = {IGI Publishers, Hershey, Pennsylvania (USA)},
abstract = {The creation of content within semistructured, collaborative information systems imposes the problem of having to deal with very heterogeneous schemata. This is due to the fact that the semistructured paradigm does not restrict the user in his choice of nomenclatures for the data he intends to store within the information system. As many users participate in the creation of data, the structure of this data is very heterogeneous. In this chapter the authors discuss two main movements that aim at dealing with heterogeneity. The first approach is concerned with efficiently avoiding structure heterogeneity within collaborative information systems by providing the users with suitable recommendations for an aligned schema during the insertion process. The second approach is mainly focussing on overcoming structure heterogeneity by providing efficient means for querying heterogeneous data.},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}

The creation of content within semistructured, collaborative information systems imposes the problem of having to deal with very heterogeneous schemata. This is due to the fact that the semistructured paradigm does not restrict the user in his choice of nomenclatures for the data he intends to store within the information system. As many users participate in the creation of data, the structure of this data is very heterogeneous. In this chapter the authors discuss two main movements that aim at dealing with heterogeneity. The first approach is concerned with efficiently avoiding structure heterogeneity within collaborative information systems by providing the users with suitable recommendations for an aligned schema during the insertion process. The second approach is mainly focussing on overcoming structure heterogeneity by providing efficient means for querying heterogeneous data.

@inproceedings{www12,
title = {SnoopyTagging: Recommending Contextualized Tags to Increase the Quality and Quantity of Meta-Information},
author = {Wolfgang Gassler and Eva Zangerle and Martin B\"{u}rgler and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/www12.pdf},
doi = {10.1145/2187980.2188102},
year = {2012},
date = {2012-01-01},
booktitle = {Proceedings of the 21st International Conference on the World Wide Web 2012, Lyon, France (Poster)},
pages = {511-512},
abstract = {Current mass-collaboration platforms use tags to annotate and categorize resources enabling effective search capabilities. However, as tags are freely chosen keywords, the resulting tag vocabulary is very heterogeneous. Another shortcoming of simple tags is that they do not allow for a specification of context to create meaningful metadata. In this paper we present the SnoopyTagging approach which supports the user in the process of creating contextualized tags while at the same time decreasing the heterogeneity of the tag vocabulary by facilitating intelligent self-learning recommendation algorithms.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Current mass-collaboration platforms use tags to annotate and categorize resources enabling effective search capabilities. However, as tags are freely chosen keywords, the resulting tag vocabulary is very heterogeneous. Another shortcoming of simple tags is that they do not allow for a specification of context to create meaningful metadata. In this paper we present the SnoopyTagging approach which supports the user in the process of creating contextualized tags while at the same time decreasing the heterogeneity of the tag vocabulary by facilitating intelligent self-learning recommendation algorithms.

@misc{ht11,
title = {Key Recommendations for Infoboxes in Wikipedia},
author = {Alexander Larcher and Eva Zangerle and Wolfgang Gassler and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/ht11.pdf},
year = {2011},
date = {2011-01-01},
abstract = {Wikipedia infoboxes represent the semistructured part of data inside a Wikipedia article. The creation of infoboxes is based on the use of templates, which provide the user with a predefined amount of keys. These keys are mainly mandatory and have to be specified in the corresponding infobox along with the corresponding values. This imposes significant limits on the user’s actions. We analyzed the development of infoboxes from about 50,000 articles. Furthermore, the templates themselves were analyzed, focusing on ambiguity and multiple usage of keys. Based on these results we introduce a prototype for infobox creation called SnoopBox, which is based on a key recommendation system. This system supports the user creating infoboxes and avoids the tricky use of templates. Both the results of the evaluation of this program and the analysis of infoboxes in Wikipedia are presented.},
howpublished = {Website of the 22nd ACM Conference on Hypertext and Hypermedia},
note = {Poster Presentation, available online at https://www.ht2011.org},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}

Wikipedia infoboxes represent the semistructured part of data inside a Wikipedia article. The creation of infoboxes is based on the use of templates, which provide the user with a predefined amount of keys. These keys are mainly mandatory and have to be specified in the corresponding infobox along with the corresponding values. This imposes significant limits on the user’s actions. We analyzed the development of infoboxes from about 50,000 articles. Furthermore, the templates themselves were analyzed, focusing on ambiguity and multiple usage of keys. Based on these results we introduce a prototype for infobox creation called SnoopBox, which is based on a key recommendation system. This system supports the user creating infoboxes and avoids the tricky use of templates. Both the results of the evaluation of this program and the analysis of infoboxes in Wikipedia are presented.

@inproceedings{umap11,
title = {Recommending #-tags in Twitter},
author = {Eva Zangerle and Wolfgang Gassler and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/sasweb11.pdf
https://ceur-ws.org/Vol-730/},
year = {2011},
date = {2011-01-01},
booktitle = {Proceedings of the Workshop on Semantic Adaptive Social Web 2011 in connection with the 19th International Conference on User Modeling, Adaptation and Personalization, UMAP 2011},
pages = {67-78},
publisher = {CEUR-WS.org, ISSN 1613-0073, Vol. 730},
address = {Gerona, Spain},
abstract = {Twitter, currently the most popular microblogging tool available, is used to publish more than 140,000,000 messages a day. Many users use hashtags to categorize their tweets. However, hashtags are
not restricted in any way in terms of usage or syntax which leads to a very heterogeneous set of hashtags occurring in the Twitter universe and therefore, decreases the search capabilities. In this paper, we present
an approach for the recommendation of highly appropriate hashtags to the user during the creation process. The recommendations aim at encouraging the user to (i) use hastags at all, (ii) use more appropriate hashtags and (iii) avoid the usage of synonymous hashtags. Therefore, the vocabulary of hashtags becomes more homogenous regarding both syntax and semantics.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

Twitter, currently the most popular microblogging tool available, is used to publish more than 140,000,000 messages a day. Many users use hashtags to categorize their tweets. However, hashtags are
not restricted in any way in terms of usage or syntax which leads to a very heterogeneous set of hashtags occurring in the Twitter universe and therefore, decreases the search capabilities. In this paper, we present
an approach for the recommendation of highly appropriate hashtags to the user during the creation process. The recommendations aim at encouraging the user to (i) use hastags at all, (ii) use more appropriate hashtags and (iii) avoid the usage of synonymous hashtags. Therefore, the vocabulary of hashtags becomes more homogenous regarding both syntax and semantics.

@inproceedings{gvdb11,
title = {SpiderStore: A Native Main Memory Approach for Graph Storage},
author = {Robert Binna and Eva Zangerle and Wolfgang Gassler and Dominic Pacher and G\"{u}nther Specht},
url = {https://www.evazangerle.at/wp-content/uploads/2017/06/gvdb11.pdf
https://ceur-ws.org/Vol-733/},
year = {2011},
date = {2011-01-01},
booktitle = {Proceedings of the 23nd Workshop Grundlagen von Datenbanken (GvDB 2011), Obergurgl, Austria},
publisher = {CEUR-WS.org, ISSN 1613-0073, Vol. 733},
abstract = {The ever increasing amount of linked open data results in a demand for high performance graph databases. In this paper we therefore introduce a memory layout which is tailored to the storage of large RDF data sets in main memory. We present the memory layout SpiderStore. This layout features a node centric design which is in contrast to the prevailing systems using triple focused approaches. The benefit of this design is a native mapping between the nodes of a graph onto memory locations connected to each other. Based on this native mapping an addressing schema which facilitates relative addressing together with a snapshot mechanism is presented. Finally a performance evaluation, which demonstrates the capabilities, of the SpiderStore memory layout is performed using an RDF-data set consisting of about 190 mio triples.},
howpublished = {online},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}

The ever increasing amount of linked open data results in a demand for high performance graph databases. In this paper we therefore introduce a memory layout which is tailored to the storage of large RDF data sets in main memory. We present the memory layout SpiderStore. This layout features a node centric design which is in contrast to the prevailing systems using triple focused approaches. The benefit of this design is a native mapping between the nodes of a graph onto memory locations connected to each other. Based on this native mapping an addressing schema which facilitates relative addressing together with a snapshot mechanism is presented. Finally a performance evaluation, which demonstrates the capabilities, of the SpiderStore memory layout is performed using an RDF-data set consisting of about 190 mio triples.