{ "cells": [ { "cell_type": "markdown", "id": "f21ef409", "metadata": {}, "source": [ "# Exploration of form-based signatures\n", "\n", "This notebook explored form-based signatures, trying to understand what individual clusters represent." ] }, { "cell_type": "code", "execution_count": 1, "id": "8d2a1075", "metadata": { "tags": [] }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import geopandas as gpd\n", "import dask.dataframe\n", "import matplotlib.pyplot as plt\n", "import urbangrammar_graphics as ugg\n", "\n", "from matplotlib.lines import Line2D" ] }, { "cell_type": "code", "execution_count": 2, "id": "4212003a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 105 ms, sys: 138 ms, total: 244 ms\n", "Wall time: 290 ms\n", "CPU times: user 51.5 s, sys: 40.5 s, total: 1min 32s\n", "Wall time: 1min 50s\n" ] } ], "source": [ "%time data = dask.dataframe.read_parquet(\"../../urbangrammar_samba/spatial_signatures/clustering_data/form/standardized/\").replace([np.inf, -np.inf], np.nan).fillna(0)\n", "%time data = data.compute().set_index('hindex')" ] }, { "cell_type": "code", "execution_count": 3, "id": "67ae292d", "metadata": {}, "outputs": [], "source": [ "labels_l1 = pd.read_parquet(\"../../urbangrammar_samba/spatial_signatures/clustering_data/k8_form_labels.pq\")\n", "labels_l2_4 = pd.read_parquet(\"../../urbangrammar_samba/spatial_signatures/clustering_data/clustergram_c4_form_labels.pq\")\n", "labels_l2_2 = pd.read_parquet(\"../../urbangrammar_samba/spatial_signatures/clustering_data/clustergram_c2_form_labels.pq\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "d68f37fa", "metadata": {}, "outputs": [], "source": [ "labels = labels_l1.copy()\n", "labels.loc[labels.k8 == 4, 'k8'] = labels_l2_4['9'].values + 40\n", "labels.loc[labels.k8 == 2, 'k8'] = labels_l2_2['8'].values + 20" ] }, { "cell_type": "code", "execution_count": 5, "id": "e0ff2ebc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | k8 | \n", "
---|---|
hindex | \n", "\n", " |
c000e094707t0000 | \n", "0 | \n", "
c000e094763t0000 | \n", "5 | \n", "
c000e094763t0001 | \n", "5 | \n", "
c000e094763t0002 | \n", "5 | \n", "
c000e094764t0000 | \n", "5 | \n", "
... | \n", "... | \n", "
c102e644989t0111 | \n", "5 | \n", "
c102e644989t0112 | \n", "5 | \n", "
c102e644989t0113 | \n", "5 | \n", "
c102e644989t0114 | \n", "5 | \n", "
c102e644989t0115 | \n", "5 | \n", "
14539578 rows × 1 columns
\n", "\n", " | sicCAR_q2 | \n", "lcdMes_q2 | \n", "sdcLAL_q2 | \n", "stbCeA_q1 | \n", "stcOri_q2 | \n", "linPDE_q2 | \n", "ssbCor_q3 | \n", "sdbAre_q3 | \n", "sddAre_q2 | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "0.326064 | \n", "0.277856 | \n", "0.240502 | \n", "0.706473 | \n", "0.871973 | \n", "0.699971 | \n", "1.119532e-02 | \n", "0.042313 | \n", "0.404416 | \n", "
1 | \n", "0.482047 | \n", "0.362348 | \n", "0.083403 | \n", "0.519900 | \n", "0.829345 | \n", "0.619649 | \n", "1.669432e-02 | \n", "0.067230 | \n", "0.237370 | \n", "
3 | \n", "0.305631 | \n", "0.287905 | \n", "0.312750 | \n", "0.646587 | \n", "0.965733 | \n", "0.764049 | \n", "1.568136e-02 | \n", "0.036948 | \n", "0.454185 | \n", "
5 | \n", "0.413370 | \n", "0.309513 | \n", "0.127579 | \n", "0.610906 | \n", "0.831341 | \n", "0.665396 | \n", "1.290708e-02 | \n", "0.051744 | \n", "0.295283 | \n", "
6 | \n", "0.464284 | \n", "0.000000 | \n", "0.210695 | \n", "0.393272 | \n", "0.000000 | \n", "1.000000 | \n", "3.361835e-18 | \n", "0.177451 | \n", "0.262379 | \n", "
7 | \n", "0.000000 | \n", "0.087568 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "0.941431 | \n", "1.647299e-16 | \n", "0.065779 | \n", "1.000000 | \n", "
20 | \n", "0.551620 | \n", "0.486684 | \n", "0.062880 | \n", "0.395459 | \n", "0.723952 | \n", "0.525718 | \n", "2.955567e-02 | \n", "0.088063 | \n", "0.148939 | \n", "
21 | \n", "0.565900 | \n", "0.453098 | \n", "0.053280 | \n", "0.394376 | \n", "0.818832 | \n", "0.530562 | \n", "5.949657e-02 | \n", "0.150553 | \n", "0.121038 | \n", "
22 | \n", "0.560256 | \n", "0.442562 | \n", "0.047341 | \n", "0.414214 | \n", "0.744077 | \n", "0.546407 | \n", "2.617269e-02 | \n", "0.108811 | \n", "0.133056 | \n", "
23 | \n", "0.550755 | \n", "0.440602 | \n", "0.053681 | \n", "0.420885 | \n", "0.874504 | \n", "0.549118 | \n", "2.954338e-02 | \n", "0.109329 | \n", "0.137769 | \n", "
24 | \n", "0.575068 | \n", "0.486899 | \n", "0.043510 | \n", "0.377864 | \n", "0.842704 | \n", "0.505271 | \n", "3.586106e-02 | \n", "0.099278 | \n", "0.122654 | \n", "
25 | \n", "0.553745 | \n", "0.464867 | \n", "0.059534 | \n", "0.399977 | \n", "0.840077 | \n", "0.523696 | \n", "3.411659e-02 | \n", "0.121572 | \n", "0.180358 | \n", "
26 | \n", "0.550945 | \n", "0.430609 | \n", "0.049239 | \n", "0.432253 | \n", "0.804648 | \n", "0.550894 | \n", "2.826390e-02 | \n", "0.090354 | \n", "0.134050 | \n", "
27 | \n", "0.650376 | \n", "0.568312 | \n", "0.028627 | \n", "0.296350 | \n", "0.799767 | \n", "0.440345 | \n", "1.847212e-01 | \n", "0.258739 | \n", "0.090640 | \n", "
40 | \n", "0.682158 | \n", "0.652837 | \n", "0.043809 | \n", "0.274658 | \n", "0.797039 | \n", "0.357952 | \n", "3.051706e-01 | \n", "0.386590 | \n", "0.237481 | \n", "
41 | \n", "0.930326 | \n", "1.000000 | \n", "0.021525 | \n", "0.035035 | \n", "0.585487 | \n", "0.039694 | \n", "5.286567e-01 | \n", "0.783033 | \n", "0.007037 | \n", "
42 | \n", "0.706732 | \n", "0.723045 | \n", "0.038523 | \n", "0.245126 | \n", "0.842489 | \n", "0.287014 | \n", "4.257728e-01 | \n", "0.443653 | \n", "0.093988 | \n", "
43 | \n", "0.026294 | \n", "0.323674 | \n", "0.715510 | \n", "0.712914 | \n", "0.998924 | \n", "0.840615 | \n", "0.000000e+00 | \n", "0.015136 | \n", "0.702403 | \n", "
44 | \n", "0.582772 | \n", "0.247936 | \n", "0.000000 | \n", "0.483896 | \n", "0.379204 | \n", "0.664963 | \n", "6.723671e-18 | \n", "0.070398 | \n", "0.029880 | \n", "
45 | \n", "0.761153 | \n", "0.824085 | \n", "0.022645 | \n", "0.168886 | \n", "0.785924 | \n", "0.195078 | \n", "3.679056e-01 | \n", "0.473925 | \n", "0.034220 | \n", "
46 | \n", "1.000000 | \n", "0.970814 | \n", "0.027575 | \n", "0.000000 | \n", "0.782228 | \n", "0.000000 | \n", "1.000000e+00 | \n", "1.000000 | \n", "0.000428 | \n", "
47 | \n", "0.953251 | \n", "0.956072 | \n", "0.029195 | \n", "0.017098 | \n", "0.655929 | \n", "0.021849 | \n", "9.829798e-01 | \n", "0.907551 | \n", "0.000000 | \n", "
48 | \n", "0.219788 | \n", "0.371201 | \n", "0.238190 | \n", "0.658567 | \n", "0.754113 | \n", "0.563664 | \n", "1.008551e-17 | \n", "0.000000 | \n", "0.233776 | \n", "