# Datensatz Vorbereitung für kollaboratives Empfehlungssystem

In [1]:
import pandas as pd

ratings = pd.read_csv(r'ratings.csv',encoding='latin-1')

## Schritt 1: Bewertungstabelle erzeugen

Da das kollaborative Empfehlungssystem nur mit den Bewertungen arbeitet, benötigen wir die Filminformationen nicht und brauchen die auch nicht einlesen. Wir arbeiten also nur mit der 'ratings'-Datei. <br/>
Pandas bietet einen Befehl an, der genau die Tabellenform aus den Zeilen erzeugt, die wir benötigen. Dieser nennt sich pivot_table. Dafür geben wir nur die Spaltennamen an, die anschließend die Zeilen und Spalten bilden sollen.

In [2]:
ergebnis = pd.pivot_table(ratings[ratings['movieId']<1000], index="userId", columns="movieId", values="rating")
ergebnis

movieId,1,2,3,4,5,6,7,8,9,10,...,989,990,991,992,993,994,996,997,998,999
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,3.5,,,,,,,,,,...,,,,,,,,,,
3,4.0,,,,,,,,,,...,,,,,,,,,,
4,3.0,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162537,,,,,,,,,,,...,,,,,,,,,,
162538,2.0,,,,,,,,,,...,,,,,,,,,,
162539,,,,,,,,,,,...,,,,,,,,,,
162540,,,,,,,,,,,...,,,,,,,,,,


## Schritt 2: NaN Werte 

In der Ausgabe siehst du, dass viele Bewertungen fehlen. Für die weitere Verarbeitung benötigen wir eine vollständige Tabelle. Daher müssen wir die NaN Werte erst einmal durch Lückenfüller ersetzen. <br/><br/>
Diskutiere, welche Lückenfüller du verwenden würdest und bearbeite den Code, sodass diese eingesetzt werden.<br/>
Zu einem späteren Zeitpunkt überlegen wir, welche Möglichkeiten es noch gibt.

In [3]:
ergebnis.fillna(0, inplace=True)
ergebnis

movieId,1,2,3,4,5,6,7,8,9,10,...,989,990,991,992,993,994,996,997,998,999
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162537,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
162538,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
162539,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
162540,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
