@INPROCEEDINGS{grimal2011marami, author = {Cl{\'e}ment Grimal and Gilles Bisson}, title = {Using a co-similarity approach on a large scale text categorization task}, booktitle = {Seconde conf{\'e}rence sur les Mod{\`e}les et l′Analyse des R{\'e}seaux : Approches Math{\'e}ématiques et Informatique}, year = {2011}, abstract = {This paper presents a framework we developed for the second Large Scale Hierarchi- cal Text Categorization challenge LSHTC2. The main idea is to propose a method allowing to deal with the terms variability among the categories in order to be able to find similarities be- tween collections of documents belonging to the same category but having few common terms. Thus, we used a co-similarity based approach, named X-Sim, that we introduced in previous work. Nevertheless, as this co-similarity methods are not highly scalable, we need to implement a ``divide and conquer'' approach to split the categories into a set of clusters containing seman- tically related documents. This lead to a two-stage strategy for the document categorization: first, we decide in which cluster the test document belongs, and then inside the elected cluster, we perform the final categorization that is based on our co-similarity approach.} }