summaryrefslogtreecommitdiff
path: root/SI/Resource/Fundamentals of Data Mining
diff options
context:
space:
mode:
authorTheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com>2024-04-29 22:06:12 -0400
committerTheSiahxyz <164138827+TheSiahxyz@users.noreply.github.com>2024-04-29 22:06:12 -0400
commit4d53fa14ee0cd615444aca6f6ba176e0ccc1b5be (patch)
tree4d9f0527d9e6db4f92736ead0aa9bb3f840a0f89 /SI/Resource/Fundamentals of Data Mining
init
Diffstat (limited to 'SI/Resource/Fundamentals of Data Mining')
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Ch.3.md10
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Apriori.md10
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Attributes of Mixed Type.md7
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Binary.md12
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Categorical.md7
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Compare and Contrast.md27
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Complexity.md25
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/DBSCAN.md12
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Data Matrix and Dissimilarity Matrix.md7
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Density-based Clustering.md21
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Dissimilarity.md12
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Distance functions.md24
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Distance measures (mixted types of attributes).md15
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/F-measure.md9
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/FP-growth.md21
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Hierarchical Clustering.md54
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/K-Means.md23
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/K-Medians.md25
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/K-Medoids.md45
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/K-Modes.md27
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/K-means++.md11
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Kernel K-Means.md39
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/NMI.md21
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Nominal.md9
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/OPTICS.md35
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Ordinal.md7
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/SSE.md23
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Target encoding.md8
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/Z-score.md20
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/attributes.md14
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/clustering algorithms.md21
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/clustering evaluation.md9
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/external.md42
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/internal.md8
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/mixted types of attributes.md21
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/pattern discovery.md27
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Content/variants.md16
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Midterm - CS663.md69
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 17.37.59@2x.pngbin0 -> 14661 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 17.41.47@2x.pngbin0 -> 13703 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.09.20@2x.pngbin0 -> 83937 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.16.28@2x.pngbin0 -> 107069 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.17.24@2x.pngbin0 -> 127849 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.18.06@2x.pngbin0 -> 96030 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.19.11@2x.pngbin0 -> 228586 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.19.54@2x.pngbin0 -> 133244 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.20.19@2x.pngbin0 -> 100760 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.20.36@2x.pngbin0 -> 121954 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.22.00@2x.pngbin0 -> 104666 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.22.12@2x.pngbin0 -> 141125 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.28.55@2x.pngbin0 -> 142883 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.43.35@2x.pngbin0 -> 46258 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 15.34.07@2x.pngbin0 -> 221097 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 21.42.19@2x.pngbin0 -> 127009 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.06.43@2x.pngbin0 -> 332972 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.07.05@2x.pngbin0 -> 727462 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.11.21@2x.pngbin0 -> 149960 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.14.30@2x.pngbin0 -> 189801 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.14.45@2x.pngbin0 -> 207011 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.15.07@2x.pngbin0 -> 384982 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.15.23@2x.pngbin0 -> 256315 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.02@2x.pngbin0 -> 289590 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.23@2x.pngbin0 -> 309364 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.37@2x.pngbin0 -> 266549 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.59@2x.pngbin0 -> 775523 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.22.49@2x.pngbin0 -> 214308 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.23.05@2x.pngbin0 -> 313525 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.57.30@2x.pngbin0 -> 309693 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.57.51@2x.pngbin0 -> 255899 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.04@2x 1.pngbin0 -> 87221 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.04@2x.pngbin0 -> 87221 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.19@2x.pngbin0 -> 253335 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.40@2x.pngbin0 -> 130279 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.59.49@2x.pngbin0 -> 201246 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.00.03@2x.pngbin0 -> 237682 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.08.52@2x.pngbin0 -> 139686 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.09.04@2x.pngbin0 -> 119736 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.09.16@2x.pngbin0 -> 97910 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.10.06@2x.pngbin0 -> 90739 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.10.19@2x.pngbin0 -> 114401 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.10.34@2x.pngbin0 -> 111261 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.11.31@2x.pngbin0 -> 147451 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.11.42@2x.pngbin0 -> 152625 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.11.56@2x.pngbin0 -> 160566 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.12.11@2x.pngbin0 -> 151875 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.12.25@2x.pngbin0 -> 158098 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.48.09@2x.pngbin0 -> 110162 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.48.19@2x.pngbin0 -> 144234 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 17.08.12@2x.pngbin0 -> 125325 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 17.08.29@2x.pngbin0 -> 144897 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 17.08.47@2x.pngbin0 -> 139410 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 22.44.57.pngbin0 -> 22513 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.53.56@2x.pngbin0 -> 199291 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.54.32@2x.pngbin0 -> 213339 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.54.44@2x.pngbin0 -> 261277 bytes
-rw-r--r--SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.55.00@2x.pngbin0 -> 202512 bytes
96 files changed, 793 insertions, 0 deletions
diff --git a/SI/Resource/Fundamentals of Data Mining/Ch.3.md b/SI/Resource/Fundamentals of Data Mining/Ch.3.md
new file mode 100644
index 0000000..f456f74
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Ch.3.md
@@ -0,0 +1,10 @@
+---
+id: Ch.3
+aliases: []
+tags: []
+---
+
+[[convex]]: in the clustering [[concave]]: not in the clustering [[K-Means]]:
+can only detect clusters that are linearly separable
+
+- in higher dimension, it can increase the chance for having a line
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Apriori.md b/SI/Resource/Fundamentals of Data Mining/Content/Apriori.md
new file mode 100644
index 0000000..450a899
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Apriori.md
@@ -0,0 +1,10 @@
+---
+id: Apriori
+aliases: []
+tags: []
+---
+
+![[CleanShot 2023-10-25 at 16.08.52@2x.png]] ![[CleanShot 2023-10-25 at
+16.09.04@2x.png]] ![[CleanShot 2023-10-25 at 16.09.16@2x.png]] ![[CleanShot
+2023-10-25 at 16.10.06@2x.png]] ![[CleanShot 2023-10-25 at 16.10.19@2x.png]]
+![[CleanShot 2023-10-25 at 16.10.34@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Attributes of Mixed Type.md b/SI/Resource/Fundamentals of Data Mining/Content/Attributes of Mixed Type.md
new file mode 100644
index 0000000..4f3149d
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Attributes of Mixed Type.md
@@ -0,0 +1,7 @@
+---
+id: Attributes of Mixed Type
+aliases: []
+tags: []
+---
+
+![[CleanShot 2023-10-23 at 18.43.35@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Binary.md b/SI/Resource/Fundamentals of Data Mining/Content/Binary.md
new file mode 100644
index 0000000..996865c
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Binary.md
@@ -0,0 +1,12 @@
+---
+id: Binary
+aliases:
+ - Example: Dissimilarity between Asymmetric Binary Variables
+tags: []
+---
+
+![[CleanShot 2023-10-23 at 18.19.11@2x.png]]
+
+### Example: Dissimilarity between Asymmetric Binary Variables
+
+![[CleanShot 2023-10-23 at 18.19.54@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Categorical.md b/SI/Resource/Fundamentals of Data Mining/Content/Categorical.md
new file mode 100644
index 0000000..6d1ec43
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Categorical.md
@@ -0,0 +1,7 @@
+---
+id: Categorical
+aliases: []
+tags: []
+---
+
+[[Nominal]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Compare and Contrast.md b/SI/Resource/Fundamentals of Data Mining/Content/Compare and Contrast.md
new file mode 100644
index 0000000..d72dd09
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Compare and Contrast.md
@@ -0,0 +1,27 @@
+---
+id: Compare and Contrast
+aliases:
+ - clustering algorithms
+tags:
+ - Compare-and-Contrast
+---
+
+## [[clustering algorithms]]
+
+- [[K-Means]] vs [[K-Medoids]]
+ - In _K-means_ algorithm, they choose means as the centroids but in the
+ _K-medoids_, data points are chosen to be the medoids[^1].
+- [[K-Means]] vs [[K-Medians]]
+
+| K-Means | K-Medians |
+| ---------------------------------------------------------- | --------------------------------------------- |
+| The center is not necessarily one of the input data points | Centers will be chosen from data points |
+| Not flexible | More flexible |
+| Not immune to noise and outliers | More robust to noise and outliers |
+| Minimize the sum of squared Euclidian distance | Minimize a sum of pairwise of dissimilarities |
+
+[^1]:
+ Medoids are **representative objects of a data set or a cluster within a
+ data set whose sum of dissimilarities to all the objects in the cluster is
+ minimal**. Medoids are similar in concept to means or centroids, but medoids are
+ always restricted to be members of the data set.
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Complexity.md b/SI/Resource/Fundamentals of Data Mining/Content/Complexity.md
new file mode 100644
index 0000000..e73595a
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Complexity.md
@@ -0,0 +1,25 @@
+---
+id: Complexity
+aliases:
+ - Computational/Time Complexity
+tags: []
+---
+
+## Computational/Time Complexity
+
+- K-Medoids:
+ - PAM: $O(K(n - k)^2)$
+- Kernel K-Means:
+ - Computational complexity (time and space) is higher than K-Means
+ - Need to compute and store n x n kernel matrix generated from the kernel
+ function on the original data, where n is the number of points
+- Hierarchical Clustering:
+ - Agglomerative Clustering
+ - Time complexity: $O(n^2)$
+ - Algorithmic Complexity: $O(m^2logm)$
+- Density-based Clustering:
+ - DBSCAN:
+ - Computational complexity: $O(nlogn)$
+ - worst case: $O(n^2)$
+ - OPTICS:
+ - Complexity: $O(NlogN)$
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/DBSCAN.md b/SI/Resource/Fundamentals of Data Mining/Content/DBSCAN.md
new file mode 100644
index 0000000..00e737d
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/DBSCAN.md
@@ -0,0 +1,12 @@
+---
+id: DBSCAN
+aliases:
+ - DBSCAN: A Density-Based Spatial Clustering Algorithm
+tags: []
+---
+
+## DBSCAN: A Density-Based Spatial Clustering Algorithm
+
+![[CleanShot 2023-10-24 at 22.21.02@2x.png]] ![[CleanShot 2023-10-24 at
+22.21.23@2x.png]] ![[CleanShot 2023-10-24 at 22.21.37@2x.png]] ![[CleanShot
+2023-10-24 at 22.21.59@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Data Matrix and Dissimilarity Matrix.md b/SI/Resource/Fundamentals of Data Mining/Content/Data Matrix and Dissimilarity Matrix.md
new file mode 100644
index 0000000..7f299d1
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Data Matrix and Dissimilarity Matrix.md
@@ -0,0 +1,7 @@
+---
+id: Data Matrix and Dissimilarity Matrix
+aliases: []
+tags: []
+---
+
+![[CleanShot 2023-10-23 at 18.09.20@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Density-based Clustering.md b/SI/Resource/Fundamentals of Data Mining/Content/Density-based Clustering.md
new file mode 100644
index 0000000..d5e11fe
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Density-based Clustering.md
@@ -0,0 +1,21 @@
+---
+id: Density-based Clustering
+aliases:
+ - Density-Based Clustering Methods
+tags: []
+---
+
+## Density-Based Clustering Methods
+
+- Clustering based on density (a **local** cluster criterion), such as
+ density-connected points
+- Major features:
+ - Discover clusters of **arbitrary** shape
+ - Handle noise
+ - One scan (only examine the local region to justify density)
+ - Need density parameters as termination condition
+- Several interesting studies:
+ - <u>[[DBSCAN]]</u>: Ester, et al.
+ - <u>[[OPTICS]]</u>: Ankerst, et al.
+ - DENCLUE: Hinneburg & D. Keim
+ - CLIQUE: Agrawal, et al.
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Dissimilarity.md b/SI/Resource/Fundamentals of Data Mining/Content/Dissimilarity.md
new file mode 100644
index 0000000..be8891f
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Dissimilarity.md
@@ -0,0 +1,12 @@
+---
+id: Dissimilarity
+aliases: []
+tags: []
+---
+
+- Dissimilarity (or distance) measure
+ - [Numerical measure](app://obsidian.md/Numeric) of how different two data
+ objects are
+ - **In some sense, the inverse of similarity**: The lower, the more alike
+ - Minimum dissimilarity is often 0 (i.e., completely similar)
+ - Range [0, 1] or [0, ], depending on the definition
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Distance functions.md b/SI/Resource/Fundamentals of Data Mining/Content/Distance functions.md
new file mode 100644
index 0000000..0b1c658
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Distance functions.md
@@ -0,0 +1,24 @@
+---
+id: Distance functions
+aliases:
+ - Numeric
+tags: []
+---
+
+## Numeric
+
+### Minkowski distance
+
+![[CleanShot 2023-10-23 at 18.16.28@2x.png]]
+
+### Sepcial Cases of Minkowski Distance
+
+![[CleanShot 2023-10-23 at 18.17.24@2x.png]]
+
+- Manhattan (or city block) distance
+- Euclidean distance
+- "supremum" distance
+
+### Example: Special Cases
+
+![[CleanShot 2023-10-23 at 18.18.06@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Distance measures (mixted types of attributes).md b/SI/Resource/Fundamentals of Data Mining/Content/Distance measures (mixted types of attributes).md
new file mode 100644
index 0000000..d97338b
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Distance measures (mixted types of attributes).md
@@ -0,0 +1,15 @@
+---
+id: Distance measures (mixted types of attributes)
+aliases:
+ - Distance measures (mixted types of attributes)
+tags: []
+---
+
+## Distance measures ([[mixted types of attributes]])
+
+- [[Dissimilarity]] (or distance) measure
+ - [[Distance functions|Numeric|Numerical measure]] of how different two data
+ objects are
+ - **In some sense, the inverse of similarity**: The lower, the more alike
+ - Minimum dissimilarity is often 0 (i.e., completely similar)
+ - Range [0, 1] or [0, $\infty$], depending on the definition
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/F-measure.md b/SI/Resource/Fundamentals of Data Mining/Content/F-measure.md
new file mode 100644
index 0000000..a467817
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/F-measure.md
@@ -0,0 +1,9 @@
+---
+id: F-measure
+aliases: []
+tags: []
+---
+
+- F-Measure ![[CleanShot 2023-10-25 at 15.57.51@2x.png]] ![[CleanShot 2023-10-25
+at 15.58.04@2x.png]] ![[CleanShot 2023-10-25 at 15.58.19@2x.png]] ![[CleanShot
+2023-10-25 at 15.58.40@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/FP-growth.md b/SI/Resource/Fundamentals of Data Mining/Content/FP-growth.md
new file mode 100644
index 0000000..f533719
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/FP-growth.md
@@ -0,0 +1,21 @@
+---
+id: FP-growth
+aliases: []
+tags: []
+---
+
+• You can expect to ‘draw’ the fp-tree using a text-based format as follows: A:3
+
+|
+
+B:3.   C:1
+
+|            |      \
+
+C:2      E:1   D:1
+
+![[CleanShot 2023-10-25 at 16.11.31@2x.png]] ![[CleanShot 2023-10-25 at
+16.11.42@2x.png]] ![[CleanShot 2023-10-25 at 16.11.56@2x.png]] ![[CleanShot
+2023-10-25 at 16.12.11@2x.png]] ![[CleanShot 2023-10-25 at 16.12.25@2x.png]]
+![[CleanShot 2023-10-25 at 22.44.57.png]]
+[Youtube](https://www.youtube.com/watch?v=GcgfSJAaBto)
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Hierarchical Clustering.md b/SI/Resource/Fundamentals of Data Mining/Content/Hierarchical Clustering.md
new file mode 100644
index 0000000..096fe2c
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Hierarchical Clustering.md
@@ -0,0 +1,54 @@
+---
+id: Hierarchical Clustering
+aliases:
+ - Hierarchical Clustering: Basic Concepts
+tags: []
+---
+
+## Hierarchical Clustering: Basic Concepts
+
+- Hierarchical clustering
+ - Generate a clustering hierarchy (drawn as a **dendrogram**)
+ - Not required to specify _K_, the number of clusters
+ - More deterministic
+ - No iterative refinement
+- Two categories of algorithms:
+ - **[[#Agglomerative Clustering Algorithm|Agglomerative]]**: Start with
+ sigleton clusters, continuously merge two clusters at a time to build a
+ **bottom-up** hierarchy of clusters
+ - **Divisive**: Start with a huge macro-cluster, split it continuously into
+ two groups, generating a **top-down** hierarchy of clusters
+
+## Dendrogram: Shows How Clusters are Merged
+
+![[CleanShot 2023-10-24 at 22.11.21@2x.png]]
+
+## Strengths of Hierarchical Clustering
+
+- Do not have to assume any particular number of clusters
+ - Any desired number of clusters can be obtained by 'cutting' the dendrogram
+ at the proper level
+- They may correspond to meaningful taxonomies
+ - Example in biological sciences (e.g., animal kingdom, phylogeny
+ reconstruction, ...)
+
+## Agglomerative Clustering Algorithm
+
+![[CleanShot 2023-10-24 at 22.14.30@2x.png]] ![[CleanShot 2023-10-24 at
+22.14.45@2x.png]] ![[CleanShot 2023-10-24 at 22.15.07@2x.png]] ![[CleanShot
+2023-10-24 at 22.15.23@2x.png]]
+
+## Extensions to Hierarchical Clustering
+
+- Major weaknesses of hierarchical clustering methods
+ - Can never undo what was done previously
+ - Do not scale well
+ - Time complexity of at least $O(n^2)$, where $n$ is the number of total
+ objects
+- Other hierarchical clustering algorithms
+ - BIRCH (1996): Use CF-tree and incrementally adjust the quality of
+ sub-clusters
+ - CURE (1998): Represent a cluster using a set of well-scattered
+ representative points
+ - CHAMELEON (1999): Use graph partitioning methods on the K-nearest neighbor
+ graph of the data
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/K-Means.md b/SI/Resource/Fundamentals of Data Mining/Content/K-Means.md
new file mode 100644
index 0000000..d61d82b
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/K-Means.md
@@ -0,0 +1,23 @@
+---
+id: K-Means
+aliases: []
+tags:
+ - Clustering-Algorithms
+ - Compare-and-Contrast
+---
+
+- K-Means [(Youtube)](https://www.youtube.com/watch?v=KzJORp8bgqs)
+ - Each cluster is represented by the center/centroid of the cluster
+- Given K, the number of clusters, the _K-Means_ clustering algorithm is
+ outlined as follows
+ - Select _**K**_ points as initial centroids
+ - **Repeat**
+ - Form _K_ clusters by assigning each point to its **closest** centroid
+ - Re-compute the centroid (i.e., _**mean point**_) of each cluster
+ - **Until** convergence criterion is satisfied (**e.g., no change of cluster
+ membership, or a certain # of iterations have been reached, or, the [[SSE]]
+ is < a pre-defined threshold**)
+- Different kinds of distance measures can be used
+ - [[Manhattan distance]] ($L_1$ norm), [[Euclidean distance]] ($L_2$ norm),
+ [[Cosine similarity]], [[Mahalanobis distance]] ![[CleanShot 2023-10-24 at
+15.34.07@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/K-Medians.md b/SI/Resource/Fundamentals of Data Mining/Content/K-Medians.md
new file mode 100644
index 0000000..91614d3
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/K-Medians.md
@@ -0,0 +1,25 @@
+---
+id: K-Medians
+aliases:
+ - *K-Medians*: Handling Outliers by Computing Medians [(Youtube)]()
+tags: []
+---
+
+## _K-Medians_: Handling Outliers by Computing Medians [(Youtube)]()
+
+- Medians are less sensitive to outliers than means
+ - Think of the median salary vs. mean salary of a large firm when adding a few
+ top executives!
+- _**K-Medians**_: Instead of taking the **mean** value of the object in a
+ cluster as a reference point, **medians** are used ($L_1$-norm is often used
+ as the distance measure)
+- The criterion function for the _K-Medians_ algorithm: $$ S =
+ \sum*{k=1}^{K}\sum*{x*{i\in{C_k}}}|x*{ij} - m e d\_{kj}|$$
+- The _K-Medians_ clustering algorithm:
+ - Select _K_ points as the initial representative objects (i.e., as initial _K
+ medians_)
+ - **Repeat**
+ - Assign every point to its nearest median
+ - Re-compute the median using the median of <u>==each individual
+ feature==</u>
+ - **Until** convergence criterion is satisfied
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/K-Medoids.md b/SI/Resource/Fundamentals of Data Mining/Content/K-Medoids.md
new file mode 100644
index 0000000..6dc59fe
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/K-Medoids.md
@@ -0,0 +1,45 @@
+---
+id: K-Medoids
+aliases:
+ - Handling Outliers: From _K-Means_ to _K-Medoids_ (Youtube)(Youtube-E.g.)
+tags:
+ - Compare-and-Contrast
+ - ""
+ - Complexity
+---
+
+## Handling Outliers: From _K-Means_ to _K-Medoids_ [(Youtube)](https://www.youtube.com/watch?v=OFELCn-6r2o) [(Youtube-E.g.)](https://www.youtube.com/watch?v=ChBxx4aR-bY&t=0s)
+
+- K-Medoids: Instead of taking the **mean** value of the objects in a cluster as
+ a reference point, **medoids** can be used, which is the **most centrally
+ located** object in a cluster
+
+- The _K-Medoids_ clustering algorithm:
+ - Select _K_ points as the initial ==representative== objects (i.e., as
+ initial _K medoids_)
+ - **Repeat**
+ - Assigning each point to the cluster with the closest medoid
+ - Randomly select a ==non-representative== object $o_i$
+ - Compute ==the total cost _S_== of swapping the medoid $m$ with
+ $o_i$(_==e.g.,useSSEtomeasure==_)
+ - If $S<0$ (_==e.g., new SSE < previous SSE==_), then swap $m$ with $o_i$ to
+ form the new set of medoids
+ - **Until** convergence criterion is satisfied
+
+### Discussion on _K-Medoids_ Clustering
+
+- _K-Medoids_ Clustering: Find _representative_ objects (<u>medoids</u>) in
+ clusters
+- _PAM_ (Partitioning Around Medoids)
+ - Starts from an initial set of medoids, and
+ - Iteratively replaces one of the medoids by one of the non-mdedoids if it
+ improves the total sum of the squared errors (SSE) of the resulting
+ clustering
+ - _PAM_ works effectively for small data sets but does not scale well for
+ large data sets (due to the computational complexity)
+ - Computational [[Complexity]]: PAM: $O(K(n - k)^2)$ (quite expensive!)
+- Efficiency improvements on PAM
+ - _**CLARA**_ (Kaufmann & Rousseeuw, 1990):
+ - PAM on samples; $O(Ks^2 + K(n - K))$, $s$ is the sample size
+ - _**CLARANS**_ (Ng & Han, 1994): ==Randomised re-sampling==, ensuring
+ efficiency + quality
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/K-Modes.md b/SI/Resource/Fundamentals of Data Mining/Content/K-Modes.md
new file mode 100644
index 0000000..6dea96c
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/K-Modes.md
@@ -0,0 +1,27 @@
+---
+id: K-Modes
+aliases:
+ - K-Modes: Clustering Categorical Data (Youtube)
+tags: []
+---
+
+## K-Modes: Clustering Categorical Data [(Youtube)](https://www.youtube.com/watch?v=b39_vipRkUo)
+
+- _K-Means_ cannot directly handle non-numerical (categorical) data - ==how to
+ calculate the mean? What do they mean?==
+ - Mapping categorical value to 0/1 cannot generate quality clusters (in
+ high-dimensional space)
+- _**K-Modes**_: An extension to _K-Means_ by replacing means of clusters with
+ _**modes**_
+ - Mode: The value that appears the most often in a **set** of data values
+- <u>Dissimilarity</u> measure between object X and the center of a cluster
+ $Z_l$
+ - $\Phi(x_j, z_j) = 1 - n_j^{\dfrac{r}{n_l}}$ when $x_j = z_j = r$; 1 when
+ $x_j \ne z_j$
+ - where $z_j$ is the categorical value of attribute j in $Z_l$, $n_l$ is the
+ number of objects in cluster $l$, and $n_j^r$ is the number of objects
+ whose attribute value is r
+- This dissimilarity measure (distance function) is _**frequency-based**_
+- Algorithm is still based on iterative _object_ cluster assignment and
+ _centroid_ update
+- A mixture of categorical and numerical data: Using a _**K-Prototype**_ method
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/K-means++.md b/SI/Resource/Fundamentals of Data Mining/Content/K-means++.md
new file mode 100644
index 0000000..5cf1343
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/K-means++.md
@@ -0,0 +1,11 @@
+---
+id: K-means++
+aliases: []
+tags: []
+---
+
+- The first centroid is selected at random
+- The next centroid selected is the one that is the farthest from the currently
+ selected (selection is based on a weighted probability score)
+- The selection continues until _K_ centroids are obtained
+- [Youtube](https://www.youtube.com/watch?v=z2yncM2HE6M)
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Kernel K-Means.md b/SI/Resource/Fundamentals of Data Mining/Content/Kernel K-Means.md
new file mode 100644
index 0000000..828a053
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Kernel K-Means.md
@@ -0,0 +1,39 @@
+---
+id: Kernel K-Means
+aliases:
+ - *Kernel K-Means Clustering*
+tags: []
+---
+
+## _Kernel K-Means Clustering_
+
+- _Kernel K-Means_ can be used to detect non-convex clusters
+ - A region is **convex** if it contains all the line segments connecting any
+ pair of its points. Otherwise, it is **concave**
+ - _K-Means_ can only detect clusters that are **linearly** separable
+- <u>Idea</u>: Project data onto the high-dimensional kernel space, and then
+ perform _K-Means_ clustering
+ - Map data points in the input space onto a high-demensional feature space
+ using the kernel function ![[CleanShot 2023-10-24 at 21.42.19@2x.png]]
+ - Perform _K-Means_ on the mapped feature space
+- Computational complexity (time and space) is higher than K-Means
+ - Need to compute and store _n x n_ kernel matrix generated from the kernel
+ function on the original data, where _n_ is the number of points
+
+## Kernel Functions and Kernel K-Means Clustering
+
+- Typical kernel functions:
+ - Polynomial kernel of degree h: $K(X_i, X_j) = (X_i*X_j+1)^h$
+ - <u>Gaussian radial basis function (RBF) kernel</u>: $K(X_i, X_j) =
+ e^{-||X_i - X_j||^2 / 2\sigma^2}$
+ - Sigmoid kernel: $K(X_i, X_j) = tanh(KX_i*X_j - \delta)$
+- The formula for kernel matrix K for any two points $x_i, x_j \in C_k$ is
+ $K_{x_ix_j} = \phi(x_i)*\phi(x_j)$
+- The [[SSE]] criterion of _kernel K-means_: $$SSE(c) =
+ \sum_{k=1}^{K}\sum_{x_i\in{C_k}}||\phi(x_i) - c_k||^2$$
+ - The formula for the cluster centroid: $$c_k =
+ \dfrac{\sum_{x_i\in{C_k}}\phi(x_i)}{|C_k|}$$
+- Clustering can be performed without the actual individual projections
+ $\Phi(x_i)$ and $\Phi(x_j)$ for the data points $x_i, x_j \in{C_k}$ (use
+ K(Xi,Xj) instead) ![[CleanShot 2023-10-24 at 22.06.43@2x.png]] ![[CleanShot
+2023-10-24 at 22.07.05@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/NMI.md b/SI/Resource/Fundamentals of Data Mining/Content/NMI.md
new file mode 100644
index 0000000..d886599
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/NMI.md
@@ -0,0 +1,21 @@
+---
+id: NMI
+aliases:
+ - Normalized mutual information (NMI)
+tags: []
+---
+
+## Normalized mutual information (NMI)
+
+- Mutual information:
+ - Quantifies the amount of shared info between $I(C,T) =
+ \sum_{i=1}^{r}\sum{j=1}^{k}p_{ij}log\dfrac{p{ij}}{p_{c_i}p_{T_j}}$
+ - Measures the dependency between the observed joint probability $p_{ij}$ of
+ $C$ and $T$, and the expected joint probability $p_{Ci} * p_P{Tj}$ under the
+ independence assumption
+ - When $C$ and $T$ are independent, $p_{ij} = p_{Ci} * p_{Tj}, I(C, T) = 0$.
+ However, there is no upper bound on the mutual information
+- **Normalized mutual information (NMI)** $$N M I(C, T) =
+ \sqrt{\dfrac{I(C,T)}{H(C)}*\dfrac{I(C, T)}{H(T)}} = \dfrac{I(C,
+ T)}{\sqrt{H(C) * H(T)}}$$
+ - Value range of NMI: [0, 1]. Value close to 1 indicates a good clustering
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Nominal.md b/SI/Resource/Fundamentals of Data Mining/Content/Nominal.md
new file mode 100644
index 0000000..e4f447f
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Nominal.md
@@ -0,0 +1,9 @@
+---
+id: Nominal
+aliases: []
+tags: []
+---
+
+- Proximity Measure for Categorical Attributes ![[CleanShot 2023-10-23 at
+18.20.19@2x.png]]
+- [[Target encoding]] for Multi-Class Classification
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/OPTICS.md b/SI/Resource/Fundamentals of Data Mining/Content/OPTICS.md
new file mode 100644
index 0000000..3578883
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/OPTICS.md
@@ -0,0 +1,35 @@
+---
+id: OPTICS
+aliases:
+ - OPTICS: Ordering Points To Identify Clustering Structure
+tags: []
+---
+
+## OPTICS: Ordering Points To Identify Clustering Structure
+
+![[CleanShot 2023-10-24 at 22.22.49@2x.png]] ![[CleanShot 2023-10-24 at
+22.23.05@2x.png]]
+
+## OPTICS (cont.)
+
+- OPTICS does not explicitly produce a data set clustering.
+- It outputs a cluster ordering.
+ - It is a linear list of all objects under analysis and
+ - Represents the density-based clustering structure of the data.
+- Objects in a denser cluster are listed closer to each other in the cluster
+ ordering
+- Ordering is equivalent to density-based clustering obtained from a wide range
+ of parameter settings.
+- Thus OPTICS does not require the user to provide a specific density threshold.
+- The cluster ordering can be used to extract basic clustering information
+ (e.g., cluster centers, or arbitrary-shaped clusters), derive the intrinsic
+ clustering structure, as well as provide a visualization of the clustering.
+- It computes an ordering of all objects in a given database. And
+- It stores the core-distance and a suitable reachability-distance for **each**
+ object in the database.
+- OPTICS maintains a list called **OrderSeeds** to help generate the output
+ ordering.
+- Objects in **OrderSeeds**
+ - Are stored by the reachability-distance from their respective closet core
+ objects,
+ - That is, by the smallest reachability-distance of each object.
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Ordinal.md b/SI/Resource/Fundamentals of Data Mining/Content/Ordinal.md
new file mode 100644
index 0000000..e5bf256
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Ordinal.md
@@ -0,0 +1,7 @@
+---
+id: Ordinal
+aliases: []
+tags: []
+---
+
+![[CleanShot 2023-10-23 at 18.28.55@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/SSE.md b/SI/Resource/Fundamentals of Data Mining/Content/SSE.md
new file mode 100644
index 0000000..007fcb7
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/SSE.md
@@ -0,0 +1,23 @@
+---
+id: SSE
+aliases:
+ - Partitioning Algorithms: Basic Concepts
+tags: []
+---
+
+## Partitioning Algorithms: Basic Concepts
+
+- <u>Partitioning method</u>: Discovering the groupings in the data by
+ optimizing a specific ==objective function== and ==iteratively== improving the
+ quality of partitions
+- _K-partitioning_ method: Partitioning a dataset _**D**_ of _**n**_ objects
+ into a set of _**K**_ clusters so that an objective function is optimized
+ (e.g., the sum of squared distances is minimized within each cluster, where
+ $C_k$ is the centroid or medoid of cluster $C_k$)
+ - A typical objective function: **Sum of Squared Errors (SSE)** $$ SSE(C) =
+ \sum*{k=1}^{K}\sum*{x\_{i\in{C_k}}}||x_i - c_k||^2$$
+- **Problem definition**: Given _K_, find a partition of _K clusters_ that
+ optimizes the chosen partitioning criterion
+ - Global optimal: Needs to exhaustively enumerate all partitions
+ - Heuristic methods (i.e., greedy algorithms): _[[K-Means]], [[K-Medians]],
+ [[K-Medoids]], etc_
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Target encoding.md b/SI/Resource/Fundamentals of Data Mining/Content/Target encoding.md
new file mode 100644
index 0000000..0502447
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Target encoding.md
@@ -0,0 +1,8 @@
+---
+id: Target encoding
+aliases: []
+tags: []
+---
+
+![[CleanShot 2023-10-23 at 18.20.36@2x.png]] ![[CleanShot 2023-10-23 at
+18.22.00@2x.png]] ![[CleanShot 2023-10-23 at 18.22.12@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/Z-score.md b/SI/Resource/Fundamentals of Data Mining/Content/Z-score.md
new file mode 100644
index 0000000..34730a6
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/Z-score.md
@@ -0,0 +1,20 @@
+---
+id: Z-score
+aliases:
+ - Z-score - An example
+tags: []
+---
+
+### Z-score - An example
+
+- John gets a mark of 64 in a physics test, where the mean is 50 and the
+ standard deviation is 8.
+- Jane gets a mark of 74 in a chemistry test, where the mean is 58 and the
+ standard deviation is 10
+
+Who has a better class performance?
+
+- John's z = (64 - 50) / 8 = 1.75
+- Jane's z = (74 - 58) / 10 = 1.6
+- Although Jane's score is higher, John's score is further above the mean, and
+ it might be concluded that John has achieved greater success.
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/attributes.md b/SI/Resource/Fundamentals of Data Mining/Content/attributes.md
new file mode 100644
index 0000000..5b03e85
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/attributes.md
@@ -0,0 +1,14 @@
+---
+id: attributes
+aliases: []
+tags: []
+---
+
+- Attribute (or dimensions, features, variables)
+ - A data field, representing a characteristic of feature of a data object
+ - E.g., customer_ID, name, address
+- Types:
+ - [[Nominal]](e.g., red, blue)
+ - [[Binary]](e.g., {true, false})
+ - [[Ordinal]](e.g., {freshman, sophomore, junior, senior})
+ - [[Numeric]]; [[quantitative]] ([[discrete]] vs [[continuous]])
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/clustering algorithms.md b/SI/Resource/Fundamentals of Data Mining/Content/clustering algorithms.md
new file mode 100644
index 0000000..ad1b29b
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/clustering algorithms.md
@@ -0,0 +1,21 @@
+---
+id: clustering algorithms
+aliases:
+ - Compare and Contrast
+tags:
+ - Clustering-Algorithms
+---
+
+## Compare and Contrast
+
+### _[[K-Means]]_
+
+### _[[K-means++]]_
+
+### _[[K-Medoids]]_
+
+### _[[K-Medians]]_
+
+### _[[K-Modes]]_
+
+### _[[Kernel K-Means]]_
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/clustering evaluation.md b/SI/Resource/Fundamentals of Data Mining/Content/clustering evaluation.md
new file mode 100644
index 0000000..2e175e9
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/clustering evaluation.md
@@ -0,0 +1,9 @@
+---
+id: clustering evaluation
+aliases: []
+tags: []
+---
+
+![[CleanShot 2023-10-25 at 16.48.09@2x.png]] ![[CleanShot 2023-10-25 at
+16.48.19@2x.png]] ![[CleanShot 2023-10-25 at 17.08.12@2x.png]] ![[CleanShot
+2023-10-25 at 17.08.29@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/external.md b/SI/Resource/Fundamentals of Data Mining/Content/external.md
new file mode 100644
index 0000000..91faabe
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/external.md
@@ -0,0 +1,42 @@
+---
+id: external
+aliases:
+ - Measuring Clustering Quality: ==External== Methods
+tags: []
+---
+
+## Measuring Clustering Quality: ==External== Methods
+
+- Given the **ground truth** _T, Q(C, T)_ is the **quality measure** for a
+ clustering C
+- _Q(C, T)_ is good if it satisfies the following **four** essential criteria
+ - **Cluster homogeneity**
+ - The purer, the better
+ - **Cluster completeness**
+ - Assign objects belonging to the same category in the ground truth to the
+ same cluster
+ - **Rag bag better than alien**
+ - Putting a heterogeneous object into a pure cluster should be penalized
+ **more** than putting it into a _rag bag_ (i.e., "miscellaneous" or
+ "other" category)
+ - **Small cluster preservation**
+ - Splitting a small category into pieces is more harmful than splitting a
+ large category into pieces
+
+## Commonly Used External Measures
+
+- **Matching-based measure**
+ - Purity, maximum matching, [[F-measure]]
+- **Entropy-Based Measures**
+ - Conditional entropy
+ - <u>Normalized mutual information (NMI)</u>
+ - Variation of information
+- **Pairwise measures**
+ - Four possibilities: True positive (TP), FN, FP, TN
+ - Jaccard coefficient, Rand statistic, Fowlkes-Mallow measure
+- **Correlation measures**
+ - Discretized Huber static, normalized discretized Huber static
+- Purity vs Maximum Matching ![[CleanShot 2023-10-25 at 15.57.30@2x.png]]
+- [[F-measure]] ![[CleanShot 2023-10-25 at 15.57.51@2x.png]] ![[CleanShot
+2023-10-25 at 15.58.04@2x.png]] ![[CleanShot 2023-10-25 at 15.58.19@2x.png]]
+ ![[CleanShot 2023-10-25 at 15.58.40@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/internal.md b/SI/Resource/Fundamentals of Data Mining/Content/internal.md
new file mode 100644
index 0000000..22834ec
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/internal.md
@@ -0,0 +1,8 @@
+---
+id: internal
+aliases: []
+tags: []
+---
+
+![[CleanShot 2023-10-25 at 15.59.49@2x.png]] ![[CleanShot 2023-10-25 at
+16.00.03@2x.png]]
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/mixted types of attributes.md b/SI/Resource/Fundamentals of Data Mining/Content/mixted types of attributes.md
new file mode 100644
index 0000000..8b8cb77
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/mixted types of attributes.md
@@ -0,0 +1,21 @@
+---
+id: mixted types of attributes
+aliases:
+ - Attributes of Mixed Types
+tags: []
+---
+
+### Attributes of Mixed Types
+
+- A dataset may contain all different types
+ - [[Nominal]], symmetric [[binary]], asymmetric [[binary]], [[Distance
+functions|numeric]], and [[ordinal]]
+- One may use a weighted formula to combine their effects: $$d(i, j) =
+ \dfrac{\Sigma_{f=1}^{p}w_{ij}^{(f)}d_{ij}^{(f)}}{\Sigma_{f=1}^{p}w_{ij}^{(f)}}$$
+ - if _f_ is numeric: use the **normalized distance (e.g., min-max distance
+ [0-1])**
+- If _f_ is binary or nominal: $d_{ij}^{(f)}=0$ if $x_{if} = x_{jf}$; or
+ $d_{ij}^{(f)} = 1$ otherwise (there are other options)
+- If _f_ is ordinal
+ - Compute ranks $z_{if}$ where $z_{if} = \dfrac{r_{if} - 1}{M_f - 1}$
+ - Treat $z_{if}$ as numeric
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/pattern discovery.md b/SI/Resource/Fundamentals of Data Mining/Content/pattern discovery.md
new file mode 100644
index 0000000..f88fe80
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/pattern discovery.md
@@ -0,0 +1,27 @@
+---
+id: pattern discovery
+aliases:
+ - What is Pattern Discovery?
+tags: []
+---
+
+## What is Pattern Discovery?
+
+- ==What are patterns?==
+ - ==Patterns==: A set of items, subsequences, or substructures that occur
+ frequently together (or strongly correlated) in a data set
+ - Patterns represent ==intrinsic== and ==important properties== of datasets
+- ==Pattern discovery==: Uncovering patterns from massive data sets
+- Motivation examples:
+ - What products were often purchased together?
+ - What are the subsequent purchases after buying an iPad?
+ - What code segments likely contain copy-and-paste bugs?
+ - What word sequences likely form phrases in this corpus? ![[CleanShot
+2023-10-26 at 01.53.56@2x.png]] ![[CleanShot 2023-10-26 at 01.54.32@2x.png]]
+ ![[CleanShot 2023-10-26 at 01.54.44@2x.png]] ![[CleanShot 2023-10-26 at
+01.55.00@2x.png]]
+
+## Efficient Pattern Mining Methods
+
+- The [[Apriori]] Algorithm
+- [[FP-Growth]]: A Frequent Pattern-Growth Approach
diff --git a/SI/Resource/Fundamentals of Data Mining/Content/variants.md b/SI/Resource/Fundamentals of Data Mining/Content/variants.md
new file mode 100644
index 0000000..767e696
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Content/variants.md
@@ -0,0 +1,16 @@
+---
+id: variants
+aliases:
+ - Variations of _K-Means_
+tags: []
+---
+
+# Variations of _K-Means_
+
+- There are many variants of the _K-Means_ method, varying in different aspects
+ - Choosing better initial centroid estimates
+ - _[[K-means++]]_, _Intelligent K-Means_, _Genetic K-Means_
+ - Choosing different representative prototypes for the clusters
+ - _[[K-Medoids]]_, _[[K-Medians]]_, _[[K-Modes]]_
+ - Applying feature transformation techniques
+ - _Weighted K-Means_, _[[Kernel K-Means]]_
diff --git a/SI/Resource/Fundamentals of Data Mining/Midterm - CS663.md b/SI/Resource/Fundamentals of Data Mining/Midterm - CS663.md
new file mode 100644
index 0000000..358ed89
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Midterm - CS663.md
@@ -0,0 +1,69 @@
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+id: Midterm - CS663
+aliases:
+ - Review
+tags: []
+------------------------------------------------------------------------------------------------------------
+# Review
+## Types of Questions
+- True or false
+- Multi-choice
+- Explain (e.g., [[K-Means]], [[NMI]])
+- [[Compare and Contrast]] (e.g., [[clustering algorithms]])
+- Computational questions (e.g., [[DBSCAN]] and [[OPTICS]] (similar to your
+ assignment questions), [[FP-growth|fp-tree]] and [[pattern discovery]]
+ (examples from the lecture), [[clustering evaluation]] )
+
+## Subjects
+- [[Distance measures (mixted types of attributes)]]
+ - How to handle [[nominal]] attributes …
+ - [[nominal|Match or no-match]](as a whole or individually)
+ - [[nominal|One-hot encoding]]
+ - [[Target encoding]]
+- Normalization ([[z-score]], [[mixted types of attributes|min-max]], …)
+- Clustering techniques:
+ - [[K-Means]] and its [[variants]]
+ - [[Hierarchical Clustering]] ([[Hierarchical Clustering|Agglomerative]])
+ - [[Density-based Clustering]]([[DBSCAN]], [[OPTICS]])
+ - [[Complexity]], [[distance functions]]
+- How to measure clustering quality ([[internal]] and [[external]] measures,
+ [[F-measure]] and its averaging/combining options when applied to multiple
+ classes/clusters)
+- Frequent pattern mining ([[Apriori]] Algorithm, [[FP-growth]])
+
+---
+
+############################################################################ [[Data Matrix and Dissimilarity Matrix]]
+
+- Data matrix
+ - A data matrix of n data points with / dimensions ![[CleanShot 2023-10-23 at
+17.37.59@2x.png]]
+- Dissimilarity (distance) matrix (n by n)
+ - n data points, but registers only the distance _d(i,j)_(typically
+ metric)![[CleanShot 2023-10-23 at 17.41.47@2x.png]]
+ - Usually symmetric, thus a trinagular matrix
+ - **[[Distance functions]]** are usually different for real, boolean,
+ categorical, ordinal, ratio, and vector variables
+ - Weights can be associated with different variables based on applications and
+ data semantics
+
+### Standardizing Numeric Data
+
+- [[Z-score]]: $z = \dfrac{x - \mu}{\sigma}$
+ - X: raw score to be standardized, $\mu$: mean of the population, $\sigma$:
+ standard deviation
+ - the distance between the raw score and the population mean in units of the
+ standard deviation
+ - negative when the raw score is below the mean, "+" when above
+- An alternative way: Calculate the mean absolute deviation $S_{f} =
+ \dfrac{1}{n}(|x_{1f} - m_f| + |x_{2f} - m_f| + ... + |x_{nf} - m_f|)$ where
+ $m_f = \dfrac{1}{n}(x_{1f} + x_{2f} + ... + x_{nf})$
+ - standardized measure (z-score): $z_{if} = \dfrac{x_{if} - m_f}{S_f}$
+- **Using mean absolute devication is more robust than using standard
+ deviation**
+
+### Proximity Measure for [[Binary|Binary Attributes]]
+
+############################################################################ Proximity Measure for [[nominal|Categorical Attributes]]
+
+############################################################################ [[Ordinal]] Variables
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 17.37.59@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 17.37.59@2x.png
new file mode 100644
index 0000000..3696a4c
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 17.37.59@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 17.41.47@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 17.41.47@2x.png
new file mode 100644
index 0000000..c3b4196
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 17.41.47@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.09.20@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.09.20@2x.png
new file mode 100644
index 0000000..69ae2b8
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.09.20@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.16.28@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.16.28@2x.png
new file mode 100644
index 0000000..3dc651a
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.16.28@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.17.24@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.17.24@2x.png
new file mode 100644
index 0000000..c7af4ad
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.17.24@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.18.06@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.18.06@2x.png
new file mode 100644
index 0000000..06fdfcf
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.18.06@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.19.11@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.19.11@2x.png
new file mode 100644
index 0000000..15cbe36
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.19.11@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.19.54@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.19.54@2x.png
new file mode 100644
index 0000000..50b7e7b
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.19.54@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.20.19@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.20.19@2x.png
new file mode 100644
index 0000000..18e9111
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.20.19@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.20.36@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.20.36@2x.png
new file mode 100644
index 0000000..34a9fca
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.20.36@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.22.00@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.22.00@2x.png
new file mode 100644
index 0000000..11c66f2
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.22.00@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.22.12@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.22.12@2x.png
new file mode 100644
index 0000000..0cac265
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.22.12@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.28.55@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.28.55@2x.png
new file mode 100644
index 0000000..16a9b08
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.28.55@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.43.35@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.43.35@2x.png
new file mode 100644
index 0000000..1fff0f3
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-23 at 18.43.35@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 15.34.07@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 15.34.07@2x.png
new file mode 100644
index 0000000..1ac50c5
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 15.34.07@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 21.42.19@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 21.42.19@2x.png
new file mode 100644
index 0000000..88cdb81
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 21.42.19@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.06.43@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.06.43@2x.png
new file mode 100644
index 0000000..afe6414
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.06.43@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.07.05@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.07.05@2x.png
new file mode 100644
index 0000000..165004d
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.07.05@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.11.21@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.11.21@2x.png
new file mode 100644
index 0000000..c039b8b
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.11.21@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.14.30@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.14.30@2x.png
new file mode 100644
index 0000000..94d41cf
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.14.30@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.14.45@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.14.45@2x.png
new file mode 100644
index 0000000..2b934c4
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.14.45@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.15.07@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.15.07@2x.png
new file mode 100644
index 0000000..df28ef3
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.15.07@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.15.23@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.15.23@2x.png
new file mode 100644
index 0000000..396166d
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.15.23@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.02@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.02@2x.png
new file mode 100644
index 0000000..f7a8993
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.02@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.23@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.23@2x.png
new file mode 100644
index 0000000..33c153c
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.23@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.37@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.37@2x.png
new file mode 100644
index 0000000..65ccece
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.37@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.59@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.59@2x.png
new file mode 100644
index 0000000..f7b18fa
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.21.59@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.22.49@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.22.49@2x.png
new file mode 100644
index 0000000..9a8c1e5
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.22.49@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.23.05@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.23.05@2x.png
new file mode 100644
index 0000000..7b968ff
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-24 at 22.23.05@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.57.30@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.57.30@2x.png
new file mode 100644
index 0000000..688beff
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.57.30@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.57.51@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.57.51@2x.png
new file mode 100644
index 0000000..773775d
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.57.51@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.04@2x 1.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.04@2x 1.png
new file mode 100644
index 0000000..c1387ec
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.04@2x 1.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.04@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.04@2x.png
new file mode 100644
index 0000000..c1387ec
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.04@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.19@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.19@2x.png
new file mode 100644
index 0000000..9310051
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.19@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.40@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.40@2x.png
new file mode 100644
index 0000000..89ca276
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.58.40@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.59.49@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.59.49@2x.png
new file mode 100644
index 0000000..5c66d6d
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 15.59.49@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.00.03@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.00.03@2x.png
new file mode 100644
index 0000000..f86fd7e
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.00.03@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.08.52@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.08.52@2x.png
new file mode 100644
index 0000000..7a80a70
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.08.52@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.09.04@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.09.04@2x.png
new file mode 100644
index 0000000..ebce74a
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.09.04@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.09.16@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.09.16@2x.png
new file mode 100644
index 0000000..ee8f4e7
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.09.16@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.10.06@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.10.06@2x.png
new file mode 100644
index 0000000..3d22627
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.10.06@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.10.19@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.10.19@2x.png
new file mode 100644
index 0000000..f75a7dc
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.10.19@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.10.34@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.10.34@2x.png
new file mode 100644
index 0000000..17df89c
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.10.34@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.11.31@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.11.31@2x.png
new file mode 100644
index 0000000..4f5411b
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.11.31@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.11.42@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.11.42@2x.png
new file mode 100644
index 0000000..18c4973
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.11.42@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.11.56@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.11.56@2x.png
new file mode 100644
index 0000000..bc850a5
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.11.56@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.12.11@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.12.11@2x.png
new file mode 100644
index 0000000..0b8801a
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.12.11@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.12.25@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.12.25@2x.png
new file mode 100644
index 0000000..98bd7e1
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.12.25@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.48.09@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.48.09@2x.png
new file mode 100644
index 0000000..56fe16c
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.48.09@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.48.19@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.48.19@2x.png
new file mode 100644
index 0000000..156f250
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 16.48.19@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 17.08.12@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 17.08.12@2x.png
new file mode 100644
index 0000000..043a4e2
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 17.08.12@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 17.08.29@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 17.08.29@2x.png
new file mode 100644
index 0000000..ebe9790
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 17.08.29@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 17.08.47@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 17.08.47@2x.png
new file mode 100644
index 0000000..6eeb3e8
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 17.08.47@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 22.44.57.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 22.44.57.png
new file mode 100644
index 0000000..23c94d4
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-25 at 22.44.57.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.53.56@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.53.56@2x.png
new file mode 100644
index 0000000..ffe51dc
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.53.56@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.54.32@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.54.32@2x.png
new file mode 100644
index 0000000..c123219
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.54.32@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.54.44@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.54.44@2x.png
new file mode 100644
index 0000000..222267b
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.54.44@2x.png
Binary files differ
diff --git a/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.55.00@2x.png b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.55.00@2x.png
new file mode 100644
index 0000000..b694ba0
--- /dev/null
+++ b/SI/Resource/Fundamentals of Data Mining/Screenshots/CleanShot 2023-10-26 at 01.55.00@2x.png
Binary files differ