0% found this document useful (0 votes)
6 views

graph_analysis3_code

Uploaded by

Bhavan Gowda
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views

graph_analysis3_code

Uploaded by

Bhavan Gowda
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 2

graph_analysis3

1 import numpy as np
2 import pandas as pd
3 import networkx as nx
4 from sklearn.ensemble import RandomForestRegresso­
r
5 from sklearn.model_selection import train_test_split, GridSearchCV
6 from sklearn.metrics import mean_squared_error, r2_score
7 from scipy.stats import ttest_ind, norm
8 import matplotlib.pyplot as plt
9 from mpl_toolkits.mplot3d import Axes3D
10
11 # Step 1: Generate Enhanced Canonical Graph Models
12 def generate_graphs(num_nodes=10):
13 graphs = {
14 "Erdos-Renyi": nx.erdos_renyi_graph(num_nodes, 0.5),
15 "Preferential Attachment": nx.barabasi_albert_grap­
h(num_nodes, 3),
16 "Watts-Strogatz": nx.watts_strogatz_graph­
(num_nodes, 4, 0.2),
17 "1D Lattice": nx.grid_graph(dim=[num_nodes]),
18 }
19 # Add edge weights to simulate latency or signal strength
20 for graph in graphs.values():
21 for u, v in graph.edges():
22 graph[u][v]['weight'] = np.random.uniform(0.1, 1.0) # Random weights
23 return graphs
24
25 # Step 2: Improved Feature Engineering
26 def create_dataset(graph):
27 # Features: degree, clustering, betweenness centrality, average shortest path length
28 degree = np.array([graph.degree[node] for node in graph.nodes()])
29 clustering = np.array([nx.clustering(graph, node) for node in graph.nodes()])
30 betweenness = np.array(list(nx.betweenness_centrali­
ty(graph).values()))
31 shortest_path = np.array([nx.average_shortest_pat­
h_length(graph) for _ in graph.nodes()])
32
33 # Combine features into a DataFrame
34 features = np.stack((degree, clustering, betweenness, shortest_path), axis=1)
35
36 # Target: Efficiency metric (e.g., inverse of latency)
37 targets = 1 / (1 + np.mean(shortest_path)) # Higher efficiency for shorter paths
38 target_values = np.full(len(graph.nodes()), targets)
39
40 return pd.DataFrame(features, columns=["Degree", "Clustering", "Betweenness", "ShortestPath"]), target_values
41
42 # Step 3: Visualize Graphs in 3D
43 def visualize_and_save(graph, name):
44 pos = nx.spring_layout(graph, dim=3, seed=42)
45 fig = plt.figure()
46 ax = fig.add_subplot(111, projection='3d')
47 for node, (x, y, z) in pos.items():
48 ax.scatter(x, y, z, label=node, s=50)
49 for neighbor in graph.neighbors(node):
50 nx_, ny_, nz_ = pos[neighbor]
51 ax.plot([x, nx_], [y, ny_], [z, nz_], color='gray', alpha=0.5)
52 ax.set_title(name)
53 plt.show()
54 adj_matrix = nx.adjacency_matrix(graph).todense()
55 return adj_matrix
56
57 # Step 4: Train Model with Hyperparameter Tuning and Bell Curve
58 def train_and_evaluate(graphs, real_data=None):
59 results = {}
60 all_errors = []
61
62 for name, graph in graphs.items():
63 print(f"\nProcessing {name} graph...")
64
65 # Visualize and save graph
66 adj_matrix = visualize_and_save(graph, name)
67
68 # Create dataset
69 X, y = create_dataset(graph)
70
71 # Split data
72 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
73
74 # Hyperparameter tuning for Random Forest
75 param_grid = {'n_estimators': [100, 200], 'max_depth': [None, 5, 10]}
76 model = GridSearchCV(RandomForestRegresso­
r(random_state=42), param_grid, cv=3)
77 model.fit(X_train, y_train)
78 best_model = model.best_estimator_
79
80 # Evaluate model
81 y_pred = best_model.predict(X_test)
82 mse = mean_squared_error(y_test, y_pred)
83 r2 = r2_score(y_test, y_pred)
84
85 # Collect residuals for bell curve
86 residuals = y_test - y_pred
87 all_errors.extend(residuals)
88
89 # Compare with real data if provided
90 if real_data is not None:
91 _, p_value = ttest_ind(real_data, y_pred)
92 else:
93 p_value = 1.0 # Placeholder if no real data
94
95 results[name] = {
96 "MSE": mse,
97 "R2": r2,
98 "P-value": p_value,
99 "Adjacency Matrix": adj_matrix
100 }
101
102 print(f"Results for {name}: MSE={mse:.4f}, R2={r2:.4f}, P-value={p_value:.4f}")
103
104 # Plot bell curve for residuals
105 plot_bell_curve(all_errors)
106 return results
107
108 # Step 5: Bell Curve for Residuals
109 def plot_bell_curve(errors):
110 mean = np.mean(errors)
111 std_dev = np.std(errors)
112 x = np.linspace(mean - 4*std_dev, mean + 4*std_dev, 100)
113 y = norm.pdf(x, mean, std_dev)
114
115 plt.figure(figsize=(8, 5))
116 plt.plot(x, y, label="Bell Curve", color='blue')
117 plt.hist(errors, bins=20, density=True, alpha=0.6, color='orange', edgecolor='black', label="Residuals")
118 plt.title("Bell Curve of Residuals")
119 plt.xlabel("Residual Value")
120 plt.ylabel("Density")
121 plt.legend()
122 plt.show()
123
124 # Main Workflow
125 if __name__ == "__main__":
126 # Generate Graphs
127 graphs = generate_graphs(num_nodes=15)
128
129 # Simulated real data for comparison (e.g., NASA metrics)
130 real_data = np.random.uniform(0.1, 0.9, 20) # Simulated real-world values
131
132 # Train, Evaluate, and Compare
133 results = train_and_evaluate(graphs, real_data=real_data)
134
135 # Summary
136 print("\nFinal Results:")
137 for graph, metrics in results.items():
138 print(f"{graph}: {metrics}")
139

You might also like