Skip to content

Commit

Permalink
add graph processing
Browse files Browse the repository at this point in the history
  • Loading branch information
abhayibm committed May 11, 2020
1 parent 0529864 commit ed5a4df
Show file tree
Hide file tree
Showing 2 changed files with 567 additions and 0 deletions.
280 changes: 280 additions & 0 deletions graph/.ipynb_checkpoints/pagerank-python-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from pyspark.sql.functions import *\n",
"from pyspark.sql.functions import pandas_udf, PandasUDFType\n",
"from pyspark.sql.types import *\n",
"from graphframes import *\n",
"from pyspark.sql.functions import *"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"spark = SparkSession \\\n",
" .builder \\\n",
" .appName(\"PageRank\") \\\n",
" .config(\"spark.executor.heartbeatInterval\", \"30000s\")\\\n",
" .getOrCreate()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <div>\n",
" <p><b>SparkSession - hive</b></p>\n",
" \n",
" <div>\n",
" <p><b>SparkContext</b></p>\n",
"\n",
" <p><a href=\"http://abhays-mbp-2:4040\">Spark UI</a></p>\n",
"\n",
" <dl>\n",
" <dt>Version</dt>\n",
" <dd><code>v2.4.3</code></dd>\n",
" <dt>Master</dt>\n",
" <dd><code>local[*]</code></dd>\n",
" <dt>AppName</dt>\n",
" <dd><code>PySparkShell</code></dd>\n",
" </dl>\n",
" </div>\n",
" \n",
" </div>\n",
" "
],
"text/plain": [
"<pyspark.sql.session.SparkSession at 0x116220f60>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"spark"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"pages = spark.read.format(\"csv\").load(\"../data/w3data/pagerank/*pages.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"links = spark.read.format(\"csv\").load(\"../data/w3data/pagerank/*links.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"links=links.select(col(\"_c0\").alias(\"src\"), (col(\"_c1\").alias(\"dst\")))\n",
"pages=pages.select(col(\"_c0\").alias(\"id\"))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------------------------------------------------------------------------------+\n",
"|id |\n",
"+---------------------------------------------------------------------------------+\n",
"|https://w3.ibm.com/w3publisher/google-analytics/education-training/when-to-use-ga|\n",
"|https://w3.ibm.com/w3publisher/gts-efc/events/initiatives/balance-sheet-reporting|\n",
"|https://w3.ibm.com/w3publisher/patents/research-master-inventor |\n",
"|https://w3.ibm.com/w3publisher/gts-next/gts-narrative |\n",
"|https://w3.ibm.com/w3publisher/gbs-eu-pde/archive/consulting-fund-2 |\n",
"+---------------------------------------------------------------------------------+\n",
"only showing top 5 rows\n",
"\n"
]
}
],
"source": [
"pages.show(5, False)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------------------------------------------------------------+-------------------------------------------------------------+\n",
"|src |dst |\n",
"+---------------------------------------------------------------+-------------------------------------------------------------+\n",
"|https://w3.ibm.com/w3publisher/patents/research-master-inventor|https://w3.ibm.com/w3publisher/patents/why-we-patent |\n",
"|https://w3.ibm.com/w3publisher/patents/research-master-inventor|https://w3.ibm.com/w3publisher/patents/awards |\n",
"|https://w3.ibm.com/w3publisher/patents/research-master-inventor|https://w3.ibm.com/ |\n",
"|https://w3.ibm.com/w3publisher/patents/research-master-inventor|http://w3.ibm.com/w3/info_terms_of_use.html |\n",
"|https://w3.ibm.com/w3publisher/patents/research-master-inventor|https://w3.ibm.com/w3publisher/patents/tips-and-presentations|\n",
"+---------------------------------------------------------------+-------------------------------------------------------------+\n",
"only showing top 5 rows\n",
"\n"
]
}
],
"source": [
"links.show(5, False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create a graphframe from vertices and edges"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"w3graph = GraphFrame(pages, links)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Run a Pagerank algorithm"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"w3pagerank = w3graph.pageRank(resetProbability=0.15, maxIter=5)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-----------------------------------------------------------------------------------------------------+------------------+\n",
"|id |pagerank |\n",
"+-----------------------------------------------------------------------------------------------------+------------------+\n",
"|https://w3.ibm.com/w3publisher |918.6350758663948 |\n",
"|https://w3.ibm.com/w3publisher/w3-privacy-notice/ |917.9575882531376 |\n",
"|https://w3.ibm.com/w3publisher/sales-and-delivery-101-mod-3/consult-to-operate |29.492137597197754|\n",
"|https://w3.ibm.com/w3publisher/process-portal-benelux/process-portal-benelux/contact |28.53062710038013 |\n",
"|https://w3.ibm.com/w3publisher/ |27.380086802454347|\n",
"|https://w3.ibm.com/w3publisher/privacy-at-ibm/gdpr-awareness-course |26.689342455816792|\n",
"|https://w3.ibm.com/w3publisher/undefined/ |24.013627161928223|\n",
"|https://w3.ibm.com/w3publisher/undefined// |16.587042868259328|\n",
"|https://w3.ibm.com/w3publisher/hpc2018 |16.22722095383144 |\n",
"|https://w3.ibm.com/w3publisher/businessresourcegroups |15.559478172024198|\n",
"|https://w3.ibm.com/w3publisher/pps-shortcuts/ |13.204535900388832|\n",
"|https://w3.ibm.com/w3publisher/celebrations/ar-operations |12.726049892941768|\n",
"|https://w3.ibm.com/w3publisher/blog |11.593287201869359|\n",
"|https://w3.ibm.com/w3publisher/women-at-ibm |11.359712827645462|\n",
"|https://w3.ibm.com/w3publisher/new-collar/new-collar/meet-the-apprentices |11.112107646371827|\n",
"|https://w3.ibm.com/w3publisher/people-with-disabilities-gateway |10.843015784715224|\n",
"|https://w3.ibm.com/w3publisher/order-management-education/2019/undefined/ |10.79000282342931 |\n",
"|https://w3.ibm.com/w3publisher/tss-canada-quarterly-newsletter |10.704340508292939|\n",
"|https://w3.ibm.com/w3publisher/sales-and-delivery-101-mod-4/iot-sensor-demo |10.669126926881551|\n",
"|https://w3.ibm.com/w3publisher/uaab4lh |10.608933822202024|\n",
"|https://w3.ibm.com/w3publisher/europe-gbs-red-hat-enablement |10.591033920619854|\n",
"|https://w3.ibm.com/w3publisher/ai-ibm/use-cases/undefined/use-cases |10.555391423742247|\n",
"|https://w3.ibm.com/w3publisher/sample-course/sales-opportunities%2Fiot-value-proposition |10.46516987243922 |\n",
"|https://w3.ibm.com/w3publisher/internet-of-things-sales-delivery-101-m56/m6nextsteps |10.19311456800684 |\n",
"|https://w3.ibm.com/w3publisher/ibm-cloud-bundles/undefined/ |10.033878023150816|\n",
"|https://w3.ibm.com/w3publisher/fastcloud/undefined/ |10.033878023150816|\n",
"|https://w3.ibm.com/w3publisher/pridegateway |9.879788844611001 |\n",
"|https://w3.ibm.com/w3publisher/veterans-gateway |9.762175399658117 |\n",
"|https://w3.ibm.com/w3publisher/hispanics-at-ibm-gateway |9.259024590999664 |\n",
"|https://w3.ibm.com/w3publisher/asian-employee-gateway |9.20173532785606 |\n",
"|https://w3.ibm.com/w3publisher/tap-education/introduction |9.07120765979852 |\n",
"|https://w3.ibm.com/w3publisher/talent-acquisition-for-hr-professionals/cognitive-solutions |9.025255534937381 |\n",
"|https://w3.ibm.com/w3publisher/native-americans-at-ibm-gateway |8.979308389889017 |\n",
"|https://w3.ibm.com/w3publisher/tap-education/unit-1 |8.755475070378825 |\n",
"|https://w3.ibm.com/w3publisher/meet-the-team |8.631114750692095 |\n",
"|https://w3.ibm.com/w3publisher/sales-and-delivery-101-mod-4/ibm-watson-iot-platform |8.571226920933485 |\n",
"|https://w3.ibm.com/w3publisher/zresilience-web/ |8.329998922120668 |\n",
"|https://w3.ibm.com/w3publisher/think2019/sales-resources |8.26912511795766 |\n",
"|https://w3.ibm.com/w3publisher/resources |8.262864092305383 |\n",
"|https://w3.ibm.com/w3publisher/black-gateway |8.121392329423122 |\n",
"|https://w3.ibm.com/w3publisher/pps-shortcuts/pps-contacts |8.107727069638473 |\n",
"|https://w3.ibm.com/w3publisher/sibos-2019 |7.9565126202668655|\n",
"|https://w3.ibm.com/w3publisher/inspire-me/inspire-me/events |7.877418776708795 |\n",
"|https://w3.ibm.com/w3publisher/elc-mentorship/find-a-mentor/undefined/find-a-mentor/business-mentors |7.877418776708795 |\n",
"|https://w3.ibm.com/w3publisher/byod/why-powerbi |7.688728592645118 |\n",
"|https://w3.ibm.com/w3publisher/ai-ibm/use-cases |7.661772852064592 |\n",
"|https://w3.ibm.com/w3publisher/gbs-l-k-spotlight-monthly-may-2018/gbs-l-k-spotlight-monthly-may-2018/|7.551928209198955 |\n",
"|https://w3.ibm.com/w3publisher/gts-mea-take-5/take-5-form |7.542097794489055 |\n",
"|https://w3.ibm.com/w3publisher/ces-2019/ginni-keynote |7.524301045618796 |\n",
"|https://w3.ibm.com/w3publisher/gbs-l-k-spotlight-monthly-jul-2018/ |7.338303965098289 |\n",
"+-----------------------------------------------------------------------------------------------------+------------------+\n",
"only showing top 50 rows\n",
"\n"
]
}
],
"source": [
"w3pagerank.vertices.orderBy(\"pagerank\", ascending=False).show(50, False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit ed5a4df

Please sign in to comment.