print 'before: {:.2f} after'.format(1.5555)

before: 1.56 after

print '{1},{0},{1},{2},{0}'.format('pos',777,True)

777,pos,777,True,pos

print '{name},{age}'.format(age=18,name='cutie')

cutie,18

has=['first', 2.00, 'third']
print '1st {0[0]} all: {0} last {0[2]} end'.format(has)

1st first all: ['first', 2.0, 'third'] last third end

print 'start--- {:,} ---end'.format(9876543210)

start--- 9,876,543,210 ---end

print 'start:{:>8}'.format(123)

start:     123

print 'start:{:0>8}'.format(123)

start:00000123

print 'start:{:A>8}'.format(123)

start:AAAAA123

import matplotlib.pyplot as plt
import numpy as np

physical_sciences=[ 13.8,  14.9,  14.8,  16.5,  18.2,  19.1,  20. ,  21.3,  22.5,
        23.7,  24.6,  25.7,  27.3,  27.6,  28. ,  27.5,  28.4,  30.4,
        29.7,  31.3,  31.6,  32.6,  32.6,  33.6,  34.8,  35.9,  37.3,
        38.3,  39.7,  40.2,  41. ,  42.2,  41.1,  41.7,  42.1,  41.6,
        40.8,  40.7,  40.7,  40.7,  40.2,  40.1];
computer_science=[ 13.6,  13.6,  14.9,  16.4,  18.9,  19.8,  23.9,  25.7,  28.1,
        30.2,  32.5,  34.8,  36.3,  37.1,  36.8,  35.7,  34.7,  32.4,
        30.8,  29.9,  29.4,  28.7,  28.2,  28.5,  28.5,  27.5,  27.1,
        26.8,  27. ,  28.1,  27.7,  27.6,  27. ,  25.1,  22.2,  20.6,
        18.6,  17.6,  17.8,  18.1,  17.6,  18.2]
year=[1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980,
       1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991,
       1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
       2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011]

two methods to specify many distinct graphs¶

# Plot in blue the % of degrees awarded to women in the Physical Sciences
plt.plot(year, physical_sciences, color='blue')

# Plot in red the % of degrees awarded to women in Computer Science
plt.plot(year, computer_science, color='red')

# Display the plot
plt.show()

# Create plot axes for the first line plot
plt.axes([0.05,0.05,0.425,0.9])

# Plot in blue the % of degrees awarded to women in the Physical Sciences
plt.plot(year,physical_sciences, color='blue')

# Create plot axes for the second line plot
plt.axes([.525,0.05,0.425,0.9])


# Plot in red the % of degrees awarded to women in Computer Science
plt.plot(year,computer_science, color='red')


# Display the plot
plt.show()

# Create a figure with 1x2 subplot and make the left subplot active
plt.subplot(1,2,1)

# Plot in blue the % of degrees awarded to women in the Physical Sciences
plt.plot(year, physical_sciences, color='blue')
plt.title('Physical Sciences')

# Make the right subplot active in the current 1x2 subplot grid
plt.subplot(1,2,2)


# Plot in red the % of degrees awarded to women in Computer Science
plt.plot(year, computer_science, color='red')
plt.title('Computer Science')

# Use plt.tight_layout() to improve the spacing between subplots
plt.tight_layout()
plt.show()

import numpy as np

from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook

N = 4000
x = np.random.random(size=N) * 100
y = np.random.random(size=N) * 100
radii = np.random.random(size=N) * 1.5
colors = ["#%02x%02x%02x" % (int(r), int(g), 150) for r, g in zip(50+2*x, 30+2*y)]

output_notebook()

p = figure()

p.scatter(x, y, radius=radii,
          fill_color=colors, fill_alpha=0.6,
          line_color=None)

# output_file("color_scatter.html", title="color_scatter.py example")

show(p)  # open a browser

import pandas as pd
import matplotlib.pyplot as plt
from time import time

%matplotlib inline
plt.rcParams['figure.figsize'] = (1.5, 1.5) # set default size of plots
# plt.rcParams['image.interpolation'] = 'nearest'
# plt.rcParams['image.cmap'] = 'gray'

from sklearn.decomposition import PCA, RandomizedPCA, randomized_svd
from sklearn.cluster import KMeans
from sklearn.manifold import Isomap
from sklearn.model_selection import train_test_split, KFold

train=pd.read_csv('train.csv')
test=pd.read_csv('test.csv')
train.shape,test.shape

((42000, 785), (28000, 784))

train.head(1)

test.head(1)

label=train.pop('label')

def fuckpca(train,test,n):
    start=time()
    pca=PCA(n_components=n,whiten=True)
    train=pca.fit_transform(train)
    test=pca.transform(test)
    print 'used {:.2f}s'.format(time()-start)
    return train,test

train_pca,test_pca=fuckpca(train,test,36)

used 4.97s

train_pca.shape,test_pca.shape

((42000L, 36L), (28000L, 36L))

plt.imshow(train_pca[3].reshape(6,-1))

<matplotlib.image.AxesImage at 0x1b99b358>

from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor

model=GradientBoostingClassifier(verbose=1,n_estimators=300)
model

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_split=1e-07, min_samples_leaf=1,
              min_samples_split=2, min_weight_fraction_leaf=0.0,
              n_estimators=300, presort='auto', random_state=None,
              subsample=1.0, verbose=1, warm_start=False)

start=time()
model.fit(train_pca,label)
print 'used {:.2f}s'.format(time()-start)

      Iter       Train Loss   Remaining Time 
         1       79940.9682            6.62m
         2       70830.7802            6.62m
         3       64173.7553            6.61m
         4       59037.7864            6.58m
         5       54599.3908            6.58m
         6       50886.2372            6.57m
         7       47761.6971            6.55m
         8       45040.9071            6.54m
         9       42543.2014            6.55m
        10       40319.8099            6.56m
        20       27069.1592            6.42m
        30       20786.5203            6.28m
        40       17051.2103            6.03m
        50       14576.1295            5.82m
        60       12778.5881            5.59m
        70       11447.6600            5.35m
        80       10375.5170            5.12m
        90        9518.4201            4.88m
       100        8816.3706            4.64m
       200        5027.3031            2.26m
       300        3355.2391            0.00s
used 405.63s

result=model.predict(test_pca)

def save():
    import numpy as np
    submit=pd.DataFrame({'ImageId':np.arange(1,len(result)+1),'Label':result})
    submit.to_csv('gbc.csv',index=False)

# sub=pd.concat([pd.Series(np.arange(1,len(result)+1)),pd.Series(result)],axis=1)
# sub.columns=['ImageId','Label']

model.score(after,label)

0.9878095238095238

tr=model.predict(after)

(tr==label).sum()/float(label.shape[0])

0.9878095238095238

from sklearn.metrics import confusion_matrix

from sklearn.svm import SVC

svc=SVC(verbose=1)
svc

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=1)

train_pca=train
test_pca=test

start=time()
svc.fit(train_pca,label)
print 'used {:.2f}s'.format(time()-start)

[LibSVM]used 4429.63s

result=svc.predict(test_pca)

save()

from sklearn import svm,datasets
from sklearn.model_selection import GridSearchCV

iris = datasets.load_iris()
iris.data[0:4]

array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2],
       [ 4.6,  3.1,  1.5,  0.2]])

parameters = {'kernel':('linear', 'rbf'), 'C':[1, 5, 10]}

model = svm.SVC()

classifier =GridSearchCV(model, parameters)

classifier.fit(iris.data, iris.target)

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'kernel': ('linear', 'rbf'), 'C': [1, 5, 10]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

classifier.best_params_

{'C': 1, 'kernel': 'linear'}

# classifier.cv_results_

classifier.best_estimator_

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

import scipy

print scipy.stats.expon(scale=100)

<scipy.stats._distn_infrastructure.rv_frozen object at 0x0000000008E57E10>

parameter_dist = {
  'C': scipy.stats.expon(scale=100),
  'kernel': ['linear'],
  'gamma': scipy.stats.expon(scale=.1),
}

classifier = grid_search.RandomizedSearchCV(model, parameter_dist)
classifier.fit(iris.data, iris.target)

RandomizedSearchCV(cv=None, error_score='raise',
          estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          fit_params={}, iid=True, n_iter=10, n_jobs=1,
          param_distributions={'kernel': ['linear'], 'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000000008E79FD0>, 'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000000008E70080>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          scoring=None, verbose=0)

classifier.best_params_, classifier.best_score_

({'C': 1.3991944739478859, 'gamma': 0.0022802232812657304, 'kernel': 'linear'},
 0.9933333333333333)

wtf=scipy.stats.expon(scale=10)

print wtf.rvs(5)

[ 10.29516445   1.53962143   2.67578885   0.21101641   1.31133069]

wtf.rvs

Python's built-in scope¶

check out Python's built-in scope, which is really just a built-in module called builtins

to query builtins, you'll need to import builtins

import builtins
print dir(builtins)

['ArithmeticError', 'AssertionError', 'AttributeError', 'BaseException', 'BufferError', 'BytesWarning', 'DeprecationWarning', 'EOFError', 'Ellipsis', 'EnvironmentError', 'Exception', 'False', 'FloatingPointError', 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError', 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError', 'None', 'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError', 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'True', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning', 'WindowsError', 'ZeroDivisionError', '__builtins__', '__doc__', '__file__', '__future_module__', '__name__', '__package__', '__path__', 'abs', 'absolute_import', 'all', 'any', 'apply', 'ascii', 'basestring', 'bin', 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'copyright', 'credits', 'delattr', 'dict', 'dir', 'divmod', 'dreload', 'enumerate', 'eval', 'execfile', 'file', 'filter', 'float', 'format', 'frozenset', 'get_ipython', 'getattr', 'globals', 'hasattr', 'hash', 'help', 'hex', 'id', 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len', 'license', 'list', 'locals', 'long', 'map', 'max', 'memoryview', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'print', 'property', 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'sys', 'tuple', 'type', 'unichr', 'unicode', 'vars', 'xrange', 'zip']

nested functions¶

return innter function

def raise_val(n):
    
    """Return the inner function."""
    def inner(x):
        """Raise x to the power of n."""
        raised = x ** n
        return raised
    
    return inner

square = raise_val(2)
cube =  raise_val(3)

print square(6), cube(6)

36 216

pass parameters¶

raise_val(4)(3)

81

scope searched¶

Local scope
Enclosing functions
Global
Built-in

n=3

def outer():
    """Prints the value of n."""
    n = 1
    def inner():
        n = 2
        print(n)
        
    inner()
    print(n)

outer()

2
1

nested func¶

nesting functions is the idea of a closure

This means that the nested or inner function remembers the state of its enclosing scope when called

Thus, anything defined locally in the enclosing scope is available to the inner function even when the outer function has finished execution

# Define echo
def echo(n):
    """Return the inner_echo function."""

    # Define inner_echo
    def inner_echo(word1):
        """Concatenate n copies of word1."""
        echo_word = word1 * n
        return echo_word

    # Return inner_echo
    return inner_echo

# Call echo: twice
twice = echo(2)

# Call echo: thrice
thrice = echo(3)

# Call twice() and thrice() then print
print(twice('hello'), thrice('hello'))

('hellohello', 'hellohellohello')

echo(7)('wtf ')

'wtf wtf wtf wtf wtf wtf wtf '

flexible arguments¶

Function with variable-length arguments (*args)¶

# Define gibberish
def gibberish(*notmatter):
    """Concatenate strings in *args together."""

    # Initialize an empty string: hodgepodge
    hodgepodge = ''

    # Concatenate the strings in args
    for word in notmatter:
        hodgepodge += word+ ' '

    # Return hodgepodge
    return hodgepodge

# Call gibberish() with one string: one_word
one_word = gibberish("luke")

# Call gibberish() with five strings: many_words
many_words = gibberish("luke", "leia", "han", "obi", "darth")

# Print one_word and many_words
print(one_word)
print(many_words)

luke 
luke leia han obi darth

Function with variable-length keyword arguments (**kwargs)¶

# Define report_status
def report_status(**whatevername):
    """Print out the status of a movie character."""

    print("\nBEGIN: REPORT\n")

    print whatevername
    print 
    # Print a formatted status report
    for key, value in whatevername.items():
        print(key + ": " + value)

    print("\nEND REPORT")

# First call to report_status()
report_status(name="luke", affiliation="jedi", status="missing")

# Second call to report_status()
report_status(name="anakin", affiliation="sith lord", status="deceased")

BEGIN: REPORT

{'status': 'missing', 'affiliation': 'jedi', 'name': 'luke'}

status: missing
affiliation: jedi
name: luke

END REPORT

BEGIN: REPORT

{'status': 'deceased', 'affiliation': 'sith lord', 'name': 'anakin'}

status: deceased
affiliation: sith lord
name: anakin

END REPORT

Map() and lambda functions¶

# Create a list of strings: spells
spells = ['protego', 'accio', 'expecto patronum', 'legilimens']

# Use map() to apply a lambda function over spells: shout_spells
shout_spells = map(lambda item: item + '!!!', spells)

# Convert shout_spells to a list: shout_spells_list
shout_spells_list = list(shout_spells)

# Convert shout_spells into a list and print it
print(shout_spells_list)

['protego!!!', 'accio!!!', 'expecto patronum!!!', 'legilimens!!!']

Filter() and lambda functions¶

The function filter() offers a way to filter out elements from a list that doesn't satisfy certain criteria.¶

filter(function or None, sequence) -> list, tuple, or string
Return those items of sequence for which function(item) is true. If function is None, return the items that are true. If sequence is a tuple or string, return the same type, else return a list.

# Create a list of strings: fellowship
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']

# Use filter() to apply a lambda function over fellowship: result
result = filter(lambda member: len(member) > 6, fellowship)


# Convert result to a list: result_list
result_list = list(result)

# Convert result into a list and print it
print(result_list)

['samwise', 'aragorn', 'legolas', 'boromir']

filter(lambda member: len(member) >3, ['1234','234','34567'])

['1234', '34567']

filter(None, [12>1, 'wtf' if 2>1 else 0, 'aiya' if 3>2 else 7, 'momomo' if 4>5 else -44])

[True, 'wtf', 'aiya', -44]

Reduce() and lambda functions¶

The reduce() function is useful for performing some computation on a list and, unlike map() and filter(), returns a single value as a result.¶

To use reduce(), you must import it from the functools module.¶

reduce(function, sequence[, initial]) -> value

Apply a function of two arguments cumulatively to the items of a sequence, from left to right, so as to reduce the sequence to a single value.

For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) calculates ((((1+2)+3)+4)+5).
If initial is present, it is placed before the items of the sequence in the calculation, and serves as a default when the sequence is empty.

# Import reduce from functools
from functools import reduce

# Create a list of strings: stark
stark = ['robb', 'sansa', 'arya', 'eddard', 'jon']

# Use result() to apply a lambda function over stark: result
result = reduce(lambda item1, item2: item1 +' '+ item2, stark)

# Print the result
print(result)

robb sansa arya eddard jon

error handling¶

raise error

try: '3'+3
except Exception, e: print e

cannot concatenate 'str' and 'int' objects

# Define shout_echo
def shout_echo(word1, echo=1):
    """Concatenate echo copies of word1 and three
    exclamation marks at the end of the string."""

    # Raise an error with raise
    if echo < 0:
        raise ValueError('echo must be greater than 0')

    # Concatenate echo copies of word1 using *: echo_word
    echo_word = word1 * echo

    # Concatenate '!!!' to echo_word: shout_word
    shout_word = echo_word + '!!!'

    # Return shout_word
    return shout_word

# Call shout_echo
try:
    shout_echo("particle", echo=-3)
except Exception, e: 
    print e

echo must be greater than 0

shout_echo("123", echo=-1)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-96-44c64dc8c830> in <module>()
----> 1 shout_echo("123", echo=-1)

<ipython-input-91-d6d66ed4753d> in shout_echo(word1, echo)
      6     # Raise an error with raise
      7     if echo < 0:
----> 8         raise ValueError('echo must be greater than 0')
      9 
     10     # Concatenate echo copies of word1 using *: echo_word

ValueError: echo must be greater than 0

use filter(lambda: x ...) in Pandas¶

# Select retweets from the Twitter dataframe: result

result = filter(lambda x: x[0:2] == 'RT', df['text'])

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-98-ac4d1cb6d465> in <module>()
      1 # Select retweets from the Twitter dataframe: result
      2 
----> 3 result = filter(lambda x: x[0:2] == 'RT', df['text'])

NameError: name 'df' is not defined

description	code	comments
quickly assign values	a,b,c = (3,7,12)	unpack
nested functions	outer func return inner func

wordpress error 500 & wordpress Path

change wordpress files deployed by bitnami on AWS

bitnami AWS server MySQL config

MySQL add user privilege

If … Mysql forget root password

pandas, read_csv( parameters explain, Chinese )

python format print example

understand axis in matplotlib

two methods to specify many distinct graphs¶

Jupyter notebook into post

Python odds and ends scope, filter, reduce

Python's built-in scope¶

nested functions¶

pass parameters¶

scope searched¶

nested func¶

flexible arguments¶

Function with variable-length arguments (*args)¶

Function with variable-length keyword arguments (**kwargs)¶

Map() and lambda functions¶

Filter() and lambda functions¶

The function filter() offers a way to filter out elements from a list that doesn't satisfy certain criteria.¶

Reduce() and lambda functions¶

The reduce() function is useful for performing some computation on a list and, unlike map() and filter(), returns a single value as a result.¶

To use reduce(), you must import it from the functools module.¶

error handling¶

use filter(lambda: x ...) in Pandas¶

Data Science Notebook

change wordpress files deployed by bitnami on AWS

bitnami AWS server MySQL config

two methods to specify many distinct graphs¶

Python's built-in scope¶

nested functions¶

pass parameters¶

scope searched¶

nested func¶

flexible arguments¶

Function with variable-length arguments (*args)¶

Function with variable-length keyword arguments (**kwargs)¶

Map() and lambda functions¶

Filter() and lambda functions¶

The function filter() offers a way to filter out elements from a list that doesn't satisfy certain criteria.¶

Reduce() and lambda functions¶

The reduce() function is useful for performing some computation on a list and, unlike map() and filter(), returns a single value as a result.¶

To use reduce(), you must import it from the functools module.¶

error handling¶

use filter(lambda: x ...) in Pandas¶