rdkit: How to show molecular's atoms number - numbers

Hello1
I was trying to use rdkit pack to finish the work of displaying the molecular's atom numbers in Jupyter Notebook ,"import IPython.core.interactiveshell" and "import InteractiveShell" ,and "from rdkit.Chem.Draw import DrawingOptions" packs,then I was using "DrawingOptions.includeAtomNumbers=True" to work it ,but the result didn't display the atoms index at all .
I don't konw what 's the reason lead to the atoms number didn't showed.
So I want to please you to give an answer fittable.
Thanks a lot!

There are three ways to show atom numbers in the molecule.
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
def show_atom_number(mol, label):
for atom in mol.GetAtoms():
atom.SetProp(label, str(atom.GetIdx()+1))
return mol
1. In place of the atoms
mol = Chem.MolFromSmiles('c1ccccc(C(N)=O)1')
show_atom_number(mol, 'atomLabel')
2. Along with the atoms
mol = Chem.MolFromSmiles('c1ccccc(C(N)=O)1')
show_atom_number(mol, 'molAtomMapNumber')
3. On top of the atoms
mol = Chem.MolFromSmiles('c1ccccc(C(N)=O)1')
show_atom_number(mol, 'atomNote')
If you want to change the numbers to display, change the part str(atom.GetIdx()+1) as per your requirements. Checkout my blog post on the same for more detailed explanation here

This works for me in a Jupyter Notebook:
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw
smiles = 'O=C(C)Oc1ccccc1C(=O)O'
mol = Chem.MolFromSmiles(smiles)
Draw.MolToImage(mol, includeAtomNumbers=True)
Update
As of version 2020.03.1 this did not work anymore.
But you can annotate atoms directly.
from rdkit import Chem
smiles = 'O=C(C)Oc1ccccc1C(=O)O'
mol = Chem.MolFromSmiles(smiles)
for atom in mol.GetAtoms():
atom.SetProp('atomLabel',str(atom.GetIdx()))

In version 2020.03.2.0 you could try
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit import Chem
mol = Chem.MolFromSmiles('c1ccccc1O')
d = rdMolDraw2D.MolDraw2DCairo(250, 200)
d.drawOptions().addAtomIndices = True
d.DrawMolecule(mol)
d.FinishDrawing()
with open('atom_annotation_1.png', 'wb') as f:
f.write(d.GetDrawingText())

Related

nearest building with open street map

I have a csv of relevant points with latitude and longitude and trying to get the nearest
building data to each point and add a column to the csv (or panda) in python. Tried using Pyrosm and various libraries but can't seem to prune the data to get the nearest building and then add the data. Thanks
This is what I have
from pyrosm import OSM
from pyrosm import get_data
import geopandas as gpd
from sklearn.neighbors import BallTree
import numpy as np
import osmnx as ox
# get rid of weird error
import shapely
import warnings
from shapely.errors import ShapelyDeprecationWarning
import csv
def get_gig_data(csv_fname):
with open(csv_fname, "r", encoding="latin-1") as gig_records:
for gig_record in csv.reader(gig_records):
yield gig_record
def main():
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning)
chicago_osm = OSM(get_data("chicago"))
#get a Point of Interest GeoDataFrame
points_of_interest = chicago_osm.get_pois() #can use a custom filter if we want to filter the types, but I think no filter might be the best
# get buildings nodes and edges
nodes, edges = chicago_osm.get_network(nodes=True, network_type="walking")
buildings = chicago_osm.get_buildings()
b_cnt = len(buildings)
G = chicago_osm.to_graph(nodes, edges)
#nodes = get_igraph_nodes(G)
buildings['geometry'] = buildings.centroid
# poi_list = np.asarray([ point.coords for point in points_of_interest['geometry'] ]) #if point.geom_type == point])
#print(poi_list.shape)
#tree = BallTree( np.asarray([ point.coords for point in points_of_interest['geometry'] if point.geom_type == point]), metric="manhattan") #Note: the scipy implementation of manhattan/cityblock distance might be faster according to the internet bc it uses a C function
#Read in the gig work data - I think the best way to do this will probably be with the CSV.reader with open thing because it will go line by line and save a ton of memory
'''for i in points_of_interest:
print('Type: ', type(i) , ' ',i)'''
gig_fp = "data_sample.csv"
#gig_data = gpd.read_file(gig_fp)
iter_gig = iter(get_gig_data(gig_fp))
next(iter_gig)
ids=dict()
for building in buildings.iterrows():
#print(type(building[1][32]) , ' ', building[1][32])
#tup = tuple(float(x) for x in [trip[17][8:-1].split()])
ids[building[1][32]] = building
#make the tree that determines closest POI
#if we use the CSV reader this for loop will be done already
for trip in iter_gig:
# Using generator so this should be efficient memory wise.
tup = tuple([float(x) for x in trip[17][8:-1].split() ])
print(type(tup), ' ', tup)
src_ids,euclidean_distance=ox.distance.nearest_nodes(G,tup)
src_ids, euclidean_distance= ox.distance.nearest_nodes(G,tup)
# find nearest node
#THEN ADD THE PICKUP AND DROPOFF IDS TO THIS TUPLE AND ADD TO A NEW NP ARRAY
if __name__ == '__main__':
main()

Can I draw a bipartite graph from every dataset?

I am trying to draw a bipartite graph for my data set, which is like below:
source target weight
reduce energy 25
reduce consumption 25
energy pennsylvania 4
energy natural 4
consumption balancing 4
the code That I am trying to plot the graph is as below:
C_2021 = nx.Graph()
C_2021.add_nodes_from(df_final_2014['source'], bipartite=0)
C_2021.add_nodes_from(df_final_2014['target'], bipartite=1)
edges = df_final_2014[['source', 'target','weight']].apply(tuple, axis=1)
C_2021.add_weighted_edges_from(edges)
But when I check with the below code whether it is bipartite or not, I get the "False" feedback.
nx.is_bipartite(C_2021)
Could you please advise what the issue is?
The previous issue is resolved, but when I want to plot the bipartite graph with the below steps, I do not get a proper result. If someone could help me, I will be appreciated it:
top_nodes_2021 = set(n for n,d in C_2021.nodes(data=True) if d['bipartite']==0)
top_nodes_2021
the output of the above is:
{'reduce'}
bottom_nodes_2021 = set(C_2021) - top_nodes_2021
bottom_nodes_2021
the output of the above is:
{'balancing', 'consumption', 'energy', 'natural', 'pennsylvania '}
then plot it by:
pos = nx.bipartite_layout(C_2021,top_nodes_2021)
plt.figure(figsize=[8,6])
# Pass that layout to nx.draw
nx.draw(C_2021,pos,node_color='#A0CBE2',edge_color='black',width=0.2,
edge_cmap=plt.cm.Blues,with_labels=True)
and the result is:
It works for me using your code. nx.is_bipartite(C_2021) returns true. Check the example below:
import sys
if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO
import pandas as pd
data = StringIO('''source;target;weight
reduce;energy;25
reduce;consumption;25
energy;pennsylvania ;4
energy;natural;4
consumption;balancing;4
''')
df_final_2014 = pd.read_csv(data, sep=";")
C_2021 = nx.Graph()
C_2021.add_nodes_from(df_final_2014['source'], bipartite=0)
C_2021.add_nodes_from(df_final_2014['target'], bipartite=1)
edges = df_final_2014[['source', 'target','weight']].apply(tuple, axis=1)
C_2021.add_weighted_edges_from(edges)
nx.is_bipartite(C_2021)
Finally to draw them get the bipartite sets. The data you passed during the creation is false (i.g. bipartite=0 and bipartite=1).
Use the following commands:
from networkx.algorithms import bipartite
top_nodes_2021, bottom_nodes_2021 = bipartite.sets(C_2021)
pos = nx.bipartite_layout(C_2021, top_nodes_2021)
plt.figure(figsize=[8,6])
# Pass that layout to nx.draw
nx.draw(C_2021,pos,node_color='#A0CBE2',edge_color='black',width=0.2,
edge_cmap=plt.cm.Blues,with_labels=True)
With the following result:

Saving a trained Detectron2 model and making predictions on a single image

I am new to detectron2 and this is my first project. After reading the docs and using the tutorials as a guide, I trained my model on the custom dataset and performed the evaluation.
I would now like to make predictions on images I receive via an API by loading this saved model. I could not find any reading materials that could help me with this task.
To save my model, I have used this link as a reference - https://detectron2.readthedocs.io/en/latest/tutorials/models.html
I am able to save my trained model using the following code-
from detectron2.modeling import build_model
model = build_model(cfg) # returns a torch.nn.Module
from detectron2.checkpoint import DetectionCheckpointer
checkpointer = DetectionCheckpointer(model, save_dir="output")
checkpointer.save("model_final") # save to output/model_final.pth
But I am still confused as to how I can go about implementing what I want. I could use some guidance on what my next steps should be. Would be extremely grateful to anyone who can help.
for a single image, create a list of data. Put image path in the file_name as below:
test_data = [{'file_name': '.../image_1jpg',
'image_id': 10}]
Then do run the following:
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.data import MetadataCatalog
from detectron2.utils.visualizer import Visualizer, ColorMode
import matplotlib.pyplot as plt
import cv2.cv2 as cv2
test_data = [{'file_name': '.../image_1jpg',
'image_id': 10}]
cfg = get_cfg()
cfg.merge_from_file("model config")
cfg.MODEL.WEIGHTS = "model_final.pth" # path for final model
predictor = DefaultPredictor(cfg)
im = cv2.imread(test_data[0]["file_name"])
outputs = predictor(im)
v = Visualizer(im[:, :, ::-1],
metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
scale=0.5,
instance_mode=ColorMode.IMAGE_BW)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
img = cv2.cvtColor(out.get_image()[:, :, ::-1], cv2.COLOR_RGBA2RGB)
plt.imshow(img)
This will show the prediction for the single image

Bokeh server plot does not update

LS,
All the answers about the same topic did not help me solve my update problem. I think it has to do with the dfs = df.sort_values(by=['E']). I use all the latest versions of the libraries. The examples on the bokeh website work fine on my configuration. Via an update button I want to allow the user to select the prefered sort order. The two other sort buttons will be added when this part works.
Here is my code:
import pandas as pd
from bokeh.plotting import figure, curdoc
from bokeh.models import ColumnDataSource
from bokeh.layouts import gridplot
from bokeh.models import Button
df = pd.DataFrame(dict(A=["AMS", "LHR", "FRA", "PTY", "CGD"], S=[7,-5,-3,3,2], E=[8,3,-2,5,8], C=[5,2,7,-3,-4]))
source = ColumnDataSource(df)
options = dict(plot_width=300, plot_height=200,
tools="pan,wheel_zoom,box_zoom,box_select,lasso_select")
button = Button(label="Change Sort to E")
p1 = figure(y_range=source.data['A'].tolist(), title="S", **options)
p1.hbar(y='A', right="S", height=0.2, source=source)
p2 = figure(y_range=source.data['A'].tolist(), title="E", **options)
p2.hbar(y="A", right="E", height=0.2, source=source)
p3 = figure(y_range=source.data['A'].tolist(), title="C", **options)
p3.hbar(y="A", right="C", height=0.2, source=source)
def update():
dfs = df.sort_values(by=['E'])
source.data = ColumnDataSource.from_df(dfs)
button.on_click(update)
p = gridplot([[button], [p1, p2, p3]], toolbar_location="right")
curdoc().add_root(p)
I run the server via: bokeh serve --show app.py --port 5009
Thank you very much for making the update work.
If you want to change the order of things on a categorical axis, you have to update the range. The order of the factors on the axis is specified exactly by the order of factors you configure for the range, so you will need to re-order the factors to match the sort order you want. So, something like:
p1.y_range.factors = new_sorted_factors
See
https://docs.bokeh.org/en/latest/docs/user_guide/categorical.html#sorted
for a complete (standalone) example.

The code will only work once

The code I am using with networkx seems to only work once per time I run it. I'm not sure why this is at all.
import networkx as nx
import matplotlib.pyplot as plt
edgelist=[1,2,1,3,1,4,1,5,1,2,4,5,2,5,2,5,6,3]
e=edgelist
def global_clustering(list_of_edges):
return global_cluster(list_of_edges)
def makegraph(g):
graph1 = nx.Graph()
graph1.add_nodes_from(nodes(g))
g1=g
while len(g1)>0:
graph1.add_edge(g1[0], g1[1])
del g1[0:2]
return graph1
def global_cluster(list1):
graph=makegraph(list1)
print nx.transitivity(graph)
Really, I just want to find the transitivity of the graph, otherwise known as global clustering coefficient. It's a function which comes with networkx.
Thanks for the help
I think something like this is what you want:
import networkx as nx
edgelist=[1,2,1,3,1,4,1,5,1,2,4,5,2,5,2,5,6,3]
edges = zip(edgelist[::2],edgelist[1::2])
graph = nx.Graph(edges)
print(nx.transitivity(graph))