Python

From bernie's
Jump to: navigation, search

elections 2017 scraping

# # -*- coding: utf-8 -*-

from urllib import urlopen
import re
import urlparse
import os

baseUrl = 'http://elections.interieur.gouv.fr/presidentielle-2017/'
townUrlsFile = os.path.dirname(os.path.realpath(__file__))+'/town_urls.txt'
#print townUrlsFile

def getTownUrlsList():
  with open(townUrlsFile, 'w') as fid:
    count = 0
    townlist = []
    #get the page
    page = urlopen(baseUrl+'index.html')
    page_content_HTML = page.read()

    #grab the list
    start = 'selected>Choisir un département</option>'
    end = '</select><br><p class="clic-carte">'
    departement_HTML = (page_content_HTML.split(start))[1].split(end)[0]

    #iterate through departments (options)
    options = re.findall(r'<option value="(.*)">(.*)</option>',departement_HTML,re.M)
    for option in options:
      
      #get the page
      page = urlopen(baseUrl+option[0])
      page_content_HTML = page.read()
      
      #grab the list of town letters
      start = 'initiale</i><br>'
      end = '\xa0\n\t\t\t<hr>\n</div></div>\n<div class="row-fluid pub-index-communes">'
      town_letters_HTML = (page_content_HTML.split(start))[1].split(end)[0]
      
      #iterate through town letters (A = all towns with A in this departement etc...)
      town_letters = re.findall(r'<a href="../../(.*)">(.*)</a>',town_letters_HTML,re.M)
      for town_letter in town_letters:
	page = urlopen(baseUrl+town_letter[0])
	page_content_HTML = page.read()
	
      
	#grab the list of towns
	start = 'tableau-communes"><tbody>'
	end = '</tbody></table>\n<br>\n</div></div>\n</div>\n<div class="row-fluid pub-bas">\n<div class="span5">'
	towns_HTML = (page_content_HTML.split(start))[1].split(end)[0]
	
	#print towns_HTML
	towns = re.findall(r'<tr><td><a href="../../(.*)">(.*)</a>',towns_HTML,re.M)      
	#iterate through towns
	for town in towns:
	  currentTown = option[1]+'|'+town[1]+'|'+baseUrl+town[0]
	  #townlist.append()
	  fid.write(currentTown+'\n')
	  count = count + 1
	print count

  #return townlist


#fid.write('\n'.join(getTownUrlsList()))
#fid.close()  
getTownUrlsList()
# # -*- coding: utf-8 -*-

from urllib import urlopen
import re
import urlparse
import os

page = urlopen('http://elections.interieur.gouv.fr/presidentielle-2017/011/075/index.html')
page_content_HTML = page.read()

#grab the list of town letters
if 'arrondissement</i></p>' in page_content_HTML:
  start = 'arrondissement</i></p>'
  end = '<div class="row-fluid pub-resultats-entete'
else:
  start = 'initiale</i><br>'
  end = '\xa0\n\t\t\t<hr>\n</div></div>\n<div class="row-fluid pub-index-communes">'
town_letters_HTML = (page_content_HTML.split(start))[1].split(end)[0]

for arrondissement in town_letters_HTML.split('</a> <a href'):
  print arrondissement.split('../../')[1].split('">')

#town_letters = re.findall(r'<a href="../../(.*)">(.*)</a>',town_letters_HTML,re.M)
#for town_letter in town_letters:
  #print town_letter
  

colorlovers color scraping

# # -*- coding: utf-8 -*-

from urllib.request import urlopen
import os
import codecs
import math
import time



def grabCL(n):
	url = "http://www.colourlovers.com/ajax/browse-palettes/_page_"+str(n)+"?section=most-loved&period=all-time&view=meta&channelID=0"
	page = urlopen(url)
	page_content = page.read()
	
	with open('Z:/BERNIE/vvvv/palettes/cl/output'+str(n)+'.txt', 'w') as fid:
		fid.write(str(page_content))
		fid.close()

def parseCL(n):
	''' disgusting code to parse webpage because i can't figure out beautifulsoup '''

	output = ""
	titles = []
	with open('Z:/BERNIE/vvvv/palettes/cl/output'+str(n)+'.txt', 'r') as fid:
		for line in fid:
			tokens = line.split("</a></h3><div class=\"left username\"")
			p = len(tokens)
			for i in range(p):

				tokensTitle = tokens[i].split("\">")
				titles.append(tokensTitle[-1])

			#get colors
			lines = line.split("<span class=\"c\" style=\"width: ")

			j = 1
			while j<len(lines):
				
				#print(titles[(int((j-1)/10))])
				output += "\n"+str(int((j-1)/10)+(n-1)*15)+" "+titles[(int((j-1)/10))].replace("\\", "")+"\n"

				for k in range(5):
					curline = lines[j+k]
					widthTokens = curline.split("px; height: 50px;")
					width = widthTokens[0]
					colorTokens = curline.split(";\"><span class=\"s\" style=\"margin-top: 45px;\">")
					color = colorTokens[0][-7:]
					
					colorString = color+" "+str(float(width)/560)[:6]
					
					#print(colorString)
					output += colorString+"\n"
				#output += "\n"

				j = j + 10
	return output
	
def scrapeCL(startPage, endPage, waitInSeconds):
	for i in range(startPage, endPage+1):
		grabCL(i)
		out = parseCL(i)
		#print(str(i))
		with open("Z:/BERNIE/vvvv/palettes/colors_cl.txt", "a") as myfile:
			myfile.write(out)
		print("Page "+str(i)+" grabbed... "+str(i*15)+ " records")
		time.sleep(waitInSeconds)

scrapeCL(1,270,1)

Raytracer

from PIL import Image
from math import sqrt

imwidth = 640
imheight = 480
im = Image.new("RGB",(imwidth,imheight),"black")
#im = Image.open("lolmonkey.jpg")
#im.show()
#im.save("hellwrld.png","PNG")
#NUMPY

mysphere = [[0,0,0],200]
#myray = [[0,0,10],[0,0,-1]]

def intersectRaySphere(ray,sphere):
	A = ray[1][0]*ray[1][0] + ray[1][1]*ray[1][1] + ray[1][2]*ray[1][2]
	B = 2.0 * (ray[1][0]*(ray[0][0]-sphere[0][0]) + ray[1][1]*(ray[0][1]-sphere[0][1]) + ray[1][2]*(ray[0][2]-sphere[0][2]))
	C = (ray[0][0]-sphere[0][0])*(ray[0][0]-sphere[0][0]) + (ray[0][1]-sphere[0][1])*(ray[0][1]-sphere[0][1]) + (ray[0][2]-sphere[0][2])*(ray[0][2]-sphere[0][2]) - sphere[1]

	delta = B*B - 4.0*A*C
	
	results = []
	if(delta==0):
		results.append(-B/(2.0*A))
	if(delta>0):
		results.append((-B+(sqrt(delta)))/(2.0*A))
		results.append((-B-(sqrt(delta)))/(2.0*A))
	
	points = []
	for t in results:
		points.append([ray[0][0] + t*ray[1][0], ray[0][1] + t*ray[1][1],	ray[0][2] + t*ray[1][2]])
		
	if(len(points)==2):
		if(points[0]>points[1]):
			points = points[1]
		else:
			points = points[0]
	return points

	
def mag(vec):
	return sqrt(vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2]);

def cross(vec1,vec2):
	return [vec1[1]*vec2[2]-vec1[2]*vec2[1], vec1[2]*vec2[0]-vec1[0]*vec2[2], vec1[0]*vec2[1]-vec1[1]*vec2[0]]
	
def normalize(vec):
	return [vec[0]/mag(vec),vec[1]/mag(vec),vec[2]/mag(vec)]
	
def dot(vec1,vec2):
	return vec1[0]*vec2[0] + vec1[1]*vec2[1] + vec1[2]*vec2[2]

def pixToPoint(i,j,width,height,xPixSize,yPixSize,center,u,v):	
	cu = (float(2*i+1)/(2*xPixSize)-.5)*width
	cv = (float(2*j+1)/(2*yPixSize)-.5)*height
	x = center[0]+cu*u[0]+cv*v[0]
	y = center[1]+cu*u[1]+cv*v[1]
	z = center[2]+cu*u[2]+cv*v[2]	
	#print [x,y,z]
	#print [i,j]
	#print [cu,cv]
	return [x,y,z]

lookat = [0,0,0]	
eye = [100,100,100]
f = 10
upvector = [0,1,0] 
viewplaneW = imwidth/2
viewplaneH = imheight/2

EA = [lookat[0]-eye[0],lookat[1]-eye[1],lookat[2]-eye[2]]
lenEA = mag(EA)
normEA = [EA[0]/lenEA,EA[1]/lenEA,EA[2]/lenEA]
center = [EA[0]+normEA[0]*f, EA[1]+normEA[1]*f, EA[2]+normEA[2]*f]

w = normEA
u = normalize(cross(upvector,w))
v = normalize(cross(u,w))
#print(cross([1,0,0],[0,1,0]))

light = [0,0,100]

	
#print intersectRaySphere(myray,mysphere)

for x in range(imwidth):
	for y in range(imheight):
		#myray = [[x,y,-10],[0,0,1]]
		point = pixToPoint(x,y,imwidth,imheight,viewplaneW,viewplaneH,center,u,v)
		ray = [point,[point[0]-eye[0] , point[1]-eye[1] , point[2]-eye[2]]]
		if(len(intersectRaySphere(ray,mysphere))):
			n = normalize([point[0]-mysphere[0][0], point[1]-mysphere[0][1],point[2]-mysphere[0][2]])
			i = normalize([light[0]-point[0], light[1]-point[1],light[2]-point[2]])
			costheta = dot(n,i)
			#if(costheta<0):
			#	costheta=0
			#color = int(costheta*255)

			#print n[0]
			#print costheta
			#print color
			im.putpixel((x,y),(int(-n[0]*255),int(-n[1]*255),int(-n[2]*255)))
			#im.putpixel((x,y),(255,255,0))
#im.show()
im.save("sphr_"+str(mysphere[1])+".png","PNG")



File Handling Dandelion

import os
import re
def listdirs(folder):
    return [d for d in os.listdir(folder) if os.path.isdir(os.path.join(folder, d))]

paths = 'N:/01_OUT'

pattern = 'GB\d+_SC\d+.*_T\d+'
#text = 'GB45_SC34_T3'
#match = re.search(pattern, text)
#print match
f = open('N:/01_OUT/summary.html', 'w')
#f.write('0123456789abcdef')
count = 0
for dir in listdirs(paths):
	f.write("<hr>\n\n</br></br>"+dir+"</br>")
	subdir = listdirs(paths+"/"+dir)
	for takes in subdir:
		tk = listdirs(paths+"/"+dir+"/"+takes)
		#if(len(tk)>4):
			#f.write("\n"+paths+"/"+dir+"/"+takes)
		f.write("\n"+paths+"/"+dir+"/"+takes+"</br>")
		for take in tk:
			match = re.search(pattern,take)
			if(match != "None"):
				count+=1
				if(count % 2 == 1):
					c = "#fbfbfb";
				else:
					c = "#eeeeee";
				f.write("\n<div style='background-color:"+c+";'>          <input type=checkbox name=\""+(paths+"/"+dir+"/"+takes)+"\" CHECKED>"+take+"</div>")
				print take
				#print takes+": "+str(len(tk))
				#print take+" ("+str(len(tk))+")"
f.close();
raw_input("-")




#for dir in os.listdir(path):
#	for subdir in os.listdir(path+"/"+dir):
#		takes = os.listdir(path+"/"+dir+"/"+subdir)
#		directories=[d for d in os.listdir(path+"/"+dir+"/"+subdir) if os.path.isdir(d)]
#		#print subdir+":"+str(len(takes))
#		print directories
#
#raw_input("Press ENTER to exit")